diff --git a/.gitignore b/.gitignore index 7383284..40ee406 100644 --- a/.gitignore +++ b/.gitignore @@ -275,4 +275,5 @@ conf/* apps/worker/celerybeat-schedule.db /apps/api/conf/* !/apps/api/conf/ -!/apps/api/conf/.gitkeep \ No newline at end of file +!/apps/api/conf/.gitkeep +.idea/FastFetchBot.iml diff --git a/AGENTS.md b/AGENTS.md index b4c6c9c..2008c56 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -205,6 +205,15 @@ MongoDB models and connection logic live in `packages/shared/fastfetchbot_shared - On subsequent cache hits, `media_files_packaging` uses stored file_ids directly via `InputMediaPhoto(file_id)` etc., skipping HTTP download entirely - The bot has no direct MongoDB access — all database writes go through the async worker via Redis +**Future media asset caching plan** — target architecture for decoupling external media handling from the Telegram bot: +- Current bot-side video probing with `ffprobe` is only a workaround for Telegram iOS aspect-ratio rendering issues. It should not grow into a full media processing pipeline inside `apps/telegram-bot` +- Move external media download, video probing (`width`, `height`, `duration`), and any future remux/transcode work into the worker layer (`apps/worker` for synchronous heavy I/O, coordinated by API/async-worker as needed) +- Introduce a durable `MediaAsset` cache backed by MongoDB metadata plus object storage such as S3. Suggested fields: source URL/hash, media type, MIME type, file size, storage key, width, height, duration, processing status, error details, expiration/lifecycle fields, and `telegram_file_id` +- Let the bot consume prepared media only: prefer `telegram_file_id` first; otherwise use a ready local/object-storage asset with known dimensions; avoid downloading or probing arbitrary remote media in the bot event loop +- Keep Telegram `file_id` as the highest-priority cache because it avoids both external downloads and object-storage reads on repeated sends +- Use object storage primarily for first sends, unstable external sources, sources requiring cookies/referer, and cases where media metadata must be known before Telegram upload +- Add lifecycle cleanup and deduplication by canonical source URL/content hash before enabling long-term object storage caching + **SQLite/PostgreSQL** (user settings — always enabled for the Telegram bot): | Variable | Default | Description | |----------|---------|-------------| diff --git a/apps/async-worker/async_worker/services/file_id_consumer.py b/apps/async-worker/async_worker/services/file_id_consumer.py index 60db77e..61e1e7c 100644 --- a/apps/async-worker/async_worker/services/file_id_consumer.py +++ b/apps/async-worker/async_worker/services/file_id_consumer.py @@ -5,10 +5,12 @@ from async_worker.config import settings from fastfetchbot_shared.utils.logger import logger +from fastfetchbot_shared.utils.number import positive_int FILEID_QUEUE_KEY = "fileid:updates" FILEID_DLQ_KEY = "fileid:updates:dlq" _MAX_RETRIES = 3 +VIDEO_METADATA_FIELDS = ("width", "height", "duration") _redis: aioredis.Redis | None = None _consumer_task: asyncio.Task | None = None @@ -44,7 +46,9 @@ async def _consume_loop() -> None: await r.lpush(FILEID_QUEUE_KEY, raw_payload) logger.info("Requeued in-flight payload before shutdown") except Exception: - logger.warning(f"Failed to requeue payload on shutdown: {raw_payload}") + logger.warning( + f"Failed to requeue payload on shutdown: {raw_payload}" + ) logger.info("file_id consumer cancelled, shutting down") break except json.JSONDecodeError as e: @@ -70,11 +74,15 @@ async def _consume_loop() -> None: try: # Stamp retry count so we can detect repeated failures. payload["_retry_count"] = retry_count + 1 - await r.lpush(FILEID_QUEUE_KEY, json.dumps(payload, ensure_ascii=False)) + await r.lpush( + FILEID_QUEUE_KEY, json.dumps(payload, ensure_ascii=False) + ) except Exception: logger.warning(f"Failed to requeue payload: {raw_payload}") else: - logger.error(f"Max retries ({_MAX_RETRIES}) exceeded, moving to DLQ: {raw_payload}") + logger.error( + f"Max retries ({_MAX_RETRIES}) exceeded, moving to DLQ: {raw_payload}" + ) try: await r.lpush(FILEID_DLQ_KEY, raw_payload) except Exception: @@ -93,9 +101,12 @@ async def _process_file_id_update(payload: dict) -> None: logger.warning(f"Invalid file_id update payload: {payload}") return - doc = await Metadata.find( - Metadata.url == metadata_url - ).sort("-version").limit(1).first_or_none() + doc = ( + await Metadata.find(Metadata.url == metadata_url) + .sort("-version") + .limit(1) + .first_or_none() + ) if doc is None or not doc.media_files: logger.warning(f"No metadata found for file_id update: {metadata_url}") @@ -104,9 +115,20 @@ async def _process_file_id_update(payload: dict) -> None: matched = 0 for update in updates: for mf in doc.media_files: - if mf.url == update["url"] and mf.telegram_file_id is None: - mf.telegram_file_id = update["telegram_file_id"] - matched += 1 + if mf.url == update["url"]: + changed = False + if mf.telegram_file_id is None: + mf.telegram_file_id = update["telegram_file_id"] + changed = True + if update.get("media_type") == "video": + for field in VIDEO_METADATA_FIELDS: + value = positive_int(update.get(field)) + if value is not None and getattr(mf, field, None) != value: + setattr(mf, field, value) + changed = True + if changed: + matched += 1 + break if matched > 0: await doc.save() diff --git a/apps/telegram-bot/Dockerfile b/apps/telegram-bot/Dockerfile index 7cf638b..21078f9 100644 --- a/apps/telegram-bot/Dockerfile +++ b/apps/telegram-bot/Dockerfile @@ -25,6 +25,7 @@ COPY --from=ghcr.io/astral-sh/uv:0.10.4 /uv /usr/local/bin/uv RUN apt-get update \ && apt-get install --no-install-recommends -y \ libmagic1 \ + ffmpeg \ build-essential # copy workspace files for dependency resolution @@ -42,7 +43,8 @@ FROM python-base AS production ENV PYTHONPATH=/app/apps/telegram-bot:$PYTHONPATH RUN apt-get update \ && apt-get install --no-install-recommends -y \ - libmagic1 + libmagic1 \ + ffmpeg COPY --from=builder-base $PYSETUP_PATH $PYSETUP_PATH COPY packages/ /app/packages/ COPY apps/telegram-bot/ /app/apps/telegram-bot/ diff --git a/apps/telegram-bot/core/services/file_id_capture.py b/apps/telegram-bot/core/services/file_id_capture.py index 0f04779..de4a33b 100644 --- a/apps/telegram-bot/core/services/file_id_capture.py +++ b/apps/telegram-bot/core/services/file_id_capture.py @@ -6,8 +6,10 @@ from core.config import settings from fastfetchbot_shared.utils.logger import logger +from fastfetchbot_shared.utils.number import positive_int FILEID_QUEUE_KEY = "fileid:updates" +VIDEO_METADATA_FIELDS = ("width", "height", "duration") _redis: aioredis.Redis | None = None @@ -34,6 +36,18 @@ def extract_file_id(message: Message, media_type: str) -> Optional[str]: return None +def _video_metadata_for_update(info: dict, message: Message) -> dict[str, int]: + metadata = {} + video = getattr(message, "video", None) + for field in VIDEO_METADATA_FIELDS: + value = positive_int(info.get(field)) + if value is None and video: + value = positive_int(getattr(video, field, None)) + if value is not None: + metadata[field] = value + return metadata + + async def capture_and_push_file_ids( uncached_info: list[dict], sent_messages: tuple[Message, ...], @@ -57,21 +71,27 @@ async def capture_and_push_file_ids( break file_id = extract_file_id(sent_messages[i], info["media_type"]) if file_id: - file_id_updates.append({ + update = { "url": info["url"], "media_type": info["media_type"], "telegram_file_id": file_id, - }) + } + if info["media_type"] == "video": + update.update(_video_metadata_for_update(info, sent_messages[i])) + file_id_updates.append(update) if not file_id_updates: return try: r = await _get_redis() - payload = json.dumps({ - "metadata_url": metadata_url, - "file_id_updates": file_id_updates, - }, ensure_ascii=False) + payload = json.dumps( + { + "metadata_url": metadata_url, + "file_id_updates": file_id_updates, + }, + ensure_ascii=False, + ) await r.lpush(FILEID_QUEUE_KEY, payload) logger.info(f"Pushed {len(file_id_updates)} file_id updates for {metadata_url}") except Exception: diff --git a/apps/telegram-bot/core/services/message_sender.py b/apps/telegram-bot/core/services/message_sender.py index 9f3bf4a..6a13050 100644 --- a/apps/telegram-bot/core/services/message_sender.py +++ b/apps/telegram-bot/core/services/message_sender.py @@ -30,6 +30,11 @@ TELEGRAM_FILE_UPLOAD_LIMIT_LOCAL_API, TEMPLATE_TRANSLATION, ) +from core.services.video_metadata import ( + ensure_video_metadata, + video_input_kwargs, + video_metadata_from_mapping, +) environment = JINJA2_ENV template = environment.get_template("social_media_message.jinja2") @@ -41,6 +46,7 @@ def _get_application(): """Lazy import to avoid circular dependency.""" from core.services.bot_app import application + return application @@ -51,8 +57,10 @@ def _log_file_id_task_exception(task: asyncio.Task): async def send_item_message( - data: dict, chat_id: Union[int, str] = None, message: Message = None, - message_id: int = None, + data: dict, + chat_id: Union[int, str] = None, + message: Message = None, + message_id: int = None, ) -> None: """ :param data: (dict) metadata of the item @@ -67,7 +75,7 @@ async def send_item_message( if not chat_id and not message: raise ValueError("must provide chat_id or message") if ( - not chat_id + not chat_id ) and message: # this function supports direct reply to a message even if the chat_id is None chat_id = message.chat.id discussion_chat_id = chat_id @@ -80,11 +88,11 @@ async def send_item_message( if len(data["media_files"]) > 0: # if the message type is short and there are some media files, send media group reply_to_message_id = None - media_message_group, file_message_group, uncached_media_info = await media_files_packaging( - media_files=data["media_files"], data=data + media_message_group, file_message_group, uncached_media_info = ( + await media_files_packaging(media_files=data["media_files"], data=data) ) if ( - len(media_message_group) > 0 + len(media_message_group) > 0 ): # if there are some media groups to send, send it all_sent_messages = [] for i, media_group in enumerate(media_message_group): @@ -110,11 +118,18 @@ async def send_item_message( reply_to_message_id = sent_media_files_message[0].message_id elif sent_media_files_message is Message: reply_to_message_id = sent_media_files_message.message_id - logger.debug(f"sent media files message: {sent_media_files_message}") + logger.debug( + f"sent media files message: {sent_media_files_message}" + ) # Background file_id capture: extract file_ids from sent messages has_uncached = any(info is not None for info in uncached_media_info) - if settings.SCRAPE_MODE == "queue" and has_uncached and all_sent_messages: + if ( + settings.SCRAPE_MODE == "queue" + and has_uncached + and all_sent_messages + ): from core.services.file_id_capture import capture_and_push_file_ids + task = asyncio.create_task( capture_and_push_file_ids( uncached_info=uncached_media_info, @@ -129,9 +144,9 @@ async def send_item_message( text=caption_text, parse_mode=ParseMode.HTML, reply_to_message_id=_reply_to, - disable_web_page_preview=True - if data["message_type"] == MessageType.SHORT - else False, + disable_web_page_preview=( + True if data["message_type"] == MessageType.SHORT else False + ), disable_notification=True, ) if discussion_chat_id != chat_id: @@ -145,22 +160,24 @@ async def send_item_message( logger.debug(f"the pinned message: {pinned_message}") if len(media_message_group) > 0: if ( - pinned_message.forward_origin.message_id - == sent_media_files_message[-1].message_id + pinned_message.forward_origin.message_id + == sent_media_files_message[-1].message_id ): reply_to_message_id = ( - group_chat.pinned_message.id - - len(sent_media_files_message) - + 1 + group_chat.pinned_message.id + - len(sent_media_files_message) + + 1 ) else: reply_to_message_id = group_chat.pinned_message.id + 1 - elif pinned_message.forward_origin.message_id == sent_message.message_id: + elif ( + pinned_message.forward_origin.message_id == sent_message.message_id + ): reply_to_message_id = group_chat.pinned_message.id else: reply_to_message_id = group_chat.pinned_message.id + 1 if ( - len(file_message_group) > 0 + len(file_message_group) > 0 ): # to send files, the files messages should be replied to the message sent before logger.debug(f"reply_to_message_id: {reply_to_message_id}") for file_group in file_message_group: @@ -178,9 +195,9 @@ async def send_item_message( text=caption_text, parse_mode=ParseMode.HTML, reply_to_message_id=_reply_to, - disable_web_page_preview=True - if data["message_type"] == "short" - else False, + disable_web_page_preview=( + True if data["message_type"] == "short" else False + ), disable_notification=True, ) except Exception: @@ -191,6 +208,7 @@ async def send_item_message( async def send_debug_channel(message: str) -> None: import html as html_module from core.config import TELEBOT_DEBUG_CHANNEL + application = _get_application() if TELEBOT_DEBUG_CHANNEL is not None: await application.bot.send_message( @@ -231,7 +249,7 @@ async def media_files_packaging(media_files: list, data: dict) -> tuple: media_message_group, media_group, file_message_group, file_group = [], [], [], [] uncached_media_info = [] for ( - media_item + media_item ) in media_files: # To traverse all media items in the media files list # check if we need to create a new media group if media_counter == TELEGRAM_SINGLE_MESSAGE_MEDIA_LIMIT: @@ -253,11 +271,15 @@ async def media_files_packaging(media_files: list, data: dict) -> tuple: elif media_type == "gif": media_group.append(InputMediaAnimation(file_id)) elif media_type == "video": - media_group.append(InputMediaVideo(file_id, supports_streaming=True)) + media_group.append( + InputMediaVideo(file_id, **video_input_kwargs(media_item)) + ) elif media_type == "audio": media_group.append(InputMediaAudio(file_id)) elif media_type == "document": - file_group.append(InputMediaDocument(file_id, parse_mode=ParseMode.HTML)) + file_group.append( + InputMediaDocument(file_id, parse_mode=ParseMode.HTML) + ) file_counter += 1 uncached_media_info.append(None) media_counter += 1 @@ -266,8 +288,8 @@ async def media_files_packaging(media_files: list, data: dict) -> tuple: ) continue if not ( - media_item["media_type"] in ["image", "gif", "video"] - and data["message_type"] == "long" + media_item["media_type"] in ["image", "gif", "video"] + and data["message_type"] == "long" ): # check the url validity url_parser = urlparse(media_item["url"]) @@ -298,7 +320,7 @@ async def media_files_packaging(media_files: list, data: dict) -> tuple: continue # check the file size if ( - not settings.TELEBOT_API_SERVER + not settings.TELEBOT_API_SERVER ): # the official telegram bot api server only supports 50MB file if file_size > TELEGRAM_FILE_UPLOAD_LIMIT: # if the size is over 50MB, skip this file @@ -322,10 +344,13 @@ async def media_files_packaging(media_files: list, data: dict) -> tuple: ) # don't try to resize image if the ratio is too large if ( - ratio < 5 - or max(img_height, img_width) < settings.TELEGRAM_IMAGE_DIMENSION_LIMIT + ratio < 5 + or max(img_height, img_width) + < settings.TELEGRAM_IMAGE_DIMENSION_LIMIT ): - image = image_compressing(image, settings.TELEGRAM_IMAGE_DIMENSION_LIMIT) + image = image_compressing( + image, settings.TELEGRAM_IMAGE_DIMENSION_LIMIT + ) with BytesIO() as buffer: # mime_type file format image.save(buffer, format=ext) @@ -342,9 +367,9 @@ async def media_files_packaging(media_files: list, data: dict) -> tuple: f"image size: {file_size}, ratio: {ratio}, width: {img_width}, height: {img_height}" ) if ( - file_size > settings.TELEGRAM_IMAGE_SIZE_LIMIT - or img_width > settings.TELEGRAM_IMAGE_DIMENSION_LIMIT - or img_height > settings.TELEGRAM_IMAGE_DIMENSION_LIMIT + file_size > settings.TELEGRAM_IMAGE_SIZE_LIMIT + or img_width > settings.TELEGRAM_IMAGE_DIMENSION_LIMIT + or img_height > settings.TELEGRAM_IMAGE_DIMENSION_LIMIT ) and data["category"] not in ["xiaohongshu"]: try: io_object = await download_file_by_metadata_item( @@ -374,7 +399,10 @@ async def media_files_packaging(media_files: list, data: dict) -> tuple: io_object.name = io_object.name + ".gif" media_group.append(InputMediaAnimation(io_object)) elif media_item["media_type"] == "video": - media_group.append(InputMediaVideo(io_object, supports_streaming=True)) + await ensure_video_metadata(media_item, io_object) + media_group.append( + InputMediaVideo(io_object, **video_input_kwargs(media_item)) + ) # TODO: not have any services to store audio files for now, just a placeholder elif media_item["media_type"] == "audio": media_group.append(InputMediaAudio(io_object)) @@ -383,10 +411,13 @@ async def media_files_packaging(media_files: list, data: dict) -> tuple: InputMediaDocument(io_object, parse_mode=ParseMode.HTML) ) file_counter += 1 - uncached_media_info.append({ + uncached_item = { "url": media_item["url"], "media_type": media_item["media_type"], - }) + } + if media_item["media_type"] == "video": + uncached_item.update(video_metadata_from_mapping(media_item)) + uncached_media_info.append(uncached_item) media_counter += 1 logger.info( f"get the {media_counter}th media item,type: {media_item['media_type']}, url: {media_item['url']}" diff --git a/apps/telegram-bot/core/services/video_metadata.py b/apps/telegram-bot/core/services/video_metadata.py new file mode 100644 index 0000000..8928ec7 --- /dev/null +++ b/apps/telegram-bot/core/services/video_metadata.py @@ -0,0 +1,152 @@ +import asyncio +import json +import os +import shutil +import tempfile + +from fastfetchbot_shared.models.classes import NamedBytesIO +from fastfetchbot_shared.utils.logger import logger +from fastfetchbot_shared.utils.number import positive_int + +VIDEO_METADATA_FIELDS = ("width", "height", "duration") +VIDEO_METADATA_PROBE_CONCURRENCY = 2 +VIDEO_TEMP_FILE_CHUNK_SIZE = 1024 * 1024 +_video_metadata_probe_semaphore = asyncio.Semaphore(VIDEO_METADATA_PROBE_CONCURRENCY) + + +def video_metadata_from_mapping(media_item: dict) -> dict[str, int]: + metadata = {} + for field in VIDEO_METADATA_FIELDS: + value = positive_int(media_item.get(field)) + if value is not None: + metadata[field] = value + return metadata + + +def video_input_kwargs(media_item: dict) -> dict: + return { + "supports_streaming": True, + **video_metadata_from_mapping(media_item), + } + + +def _video_rotation(stream: dict) -> int: + tags = stream.get("tags") or {} + rotation = tags.get("rotate") + if rotation is None: + for side_data in stream.get("side_data_list") or []: + if "rotation" in side_data: + rotation = side_data["rotation"] + break + try: + value = positive_int(abs(float(rotation))) if rotation is not None else None + except (TypeError, ValueError): + value = None + return value or 0 + + +def _write_video_temp_file( + io_object: NamedBytesIO, + temp_file, + original_position: int, +) -> None: + try: + io_object.seek(0) + while chunk := io_object.read(VIDEO_TEMP_FILE_CHUNK_SIZE): + temp_file.write(chunk) + temp_file.flush() + finally: + io_object.seek(original_position) + + +async def probe_video_metadata(io_object: NamedBytesIO) -> dict[str, int]: + """Read video dimensions from the file so Telegram iOS receives explicit aspect data.""" + # TODO: This bot-side ffprobe path is a workaround for Telegram iOS aspect ratio bugs. + # Move media download/probing into the worker-backed MediaAsset cache instead. + ffprobe = shutil.which("ffprobe") + if not ffprobe: + logger.debug( + "ffprobe is not available; sending video without probed dimensions" + ) + return {} + + file_name = str(getattr(io_object, "name", "") or "") + suffix = os.path.splitext(file_name)[1] or ".mp4" + original_position = io_object.tell() + + async with _video_metadata_probe_semaphore: + try: + with tempfile.NamedTemporaryFile(suffix=suffix) as temp_file: + await asyncio.to_thread( + _write_video_temp_file, + io_object, + temp_file, + original_position, + ) + + process = await asyncio.create_subprocess_exec( + ffprobe, + "-v", + "error", + "-select_streams", + "v:0", + "-show_entries", + "stream=width,height,duration:stream_tags=rotate:stream_side_data=rotation", + "-of", + "json", + temp_file.name, + stdout=asyncio.subprocess.PIPE, + stderr=asyncio.subprocess.PIPE, + ) + stdout, stderr = await process.communicate() + + if process.returncode != 0: + logger.warning( + f"ffprobe failed for {file_name or 'video'}: {stderr.decode(errors='ignore')}" + ) + return {} + + data = json.loads(stdout.decode() or "{}") + streams = data.get("streams") or [] + if not streams: + return {} + + stream = streams[0] + width = positive_int(stream.get("width")) + height = positive_int(stream.get("height")) + if width and height and _video_rotation(stream) % 180 == 90: + width, height = height, width + + metadata = {} + if width is not None: + metadata["width"] = width + if height is not None: + metadata["height"] = height + duration = positive_int(stream.get("duration")) + if duration is not None: + metadata["duration"] = duration + return metadata + except Exception: + logger.exception( + f"Failed to probe video metadata for {file_name or 'video'}" + ) + return {} + finally: + try: + io_object.seek(original_position) + except Exception: + logger.exception( + "Failed to restore video stream position after probing" + ) + + +async def ensure_video_metadata(media_item: dict, io_object: NamedBytesIO) -> None: + if media_item["media_type"] != "video": + return + metadata = video_metadata_from_mapping(media_item) + if all(field in metadata for field in VIDEO_METADATA_FIELDS): + return + probed_metadata = await probe_video_metadata(io_object) + for field, value in probed_metadata.items(): + if positive_int(media_item.get(field)) is None: + media_item[field] = value diff --git a/packages/shared/fastfetchbot_shared/models/metadata_item.py b/packages/shared/fastfetchbot_shared/models/metadata_item.py index 2613802..0307237 100644 --- a/packages/shared/fastfetchbot_shared/models/metadata_item.py +++ b/packages/shared/fastfetchbot_shared/models/metadata_item.py @@ -49,6 +49,9 @@ class MediaFile: original_url: Optional[str] = None caption: Optional[str] = None telegram_file_id: Optional[str] = None + width: Optional[int] = None + height: Optional[int] = None + duration: Optional[int] = None @staticmethod def from_dict(obj: Any) -> "MediaFile": @@ -57,7 +60,18 @@ def from_dict(obj: Any) -> "MediaFile": url = from_str(obj.get("url")) caption = from_str(obj.get("caption")) telegram_file_id = obj.get("telegram_file_id") - return MediaFile(media_type, url, caption=caption, telegram_file_id=telegram_file_id) + width = from_optional_int(obj.get("width")) + height = from_optional_int(obj.get("height")) + duration = from_optional_int(obj.get("duration")) + return MediaFile( + media_type, + url, + caption=caption, + telegram_file_id=telegram_file_id, + width=width, + height=height, + duration=duration, + ) def to_dict(self) -> dict: result: dict = {} @@ -66,6 +80,15 @@ def to_dict(self) -> dict: result["caption"] = self.caption if self.telegram_file_id is not None: result["telegram_file_id"] = self.telegram_file_id + width = from_optional_int(self.width) + height = from_optional_int(self.height) + duration = from_optional_int(self.duration) + if width is not None: + result["width"] = width + if height is not None: + result["height"] = height + if duration is not None: + result["duration"] = duration return result @@ -115,7 +138,9 @@ def to_dict(self) -> dict: timestamp = from_optional_int(getattr(self, "timestamp", None)) message_type = getattr(self, "message_type", None) message_type_value = ( - message_type.value if isinstance(message_type, MessageType) else message_type + message_type.value + if isinstance(message_type, MessageType) + else message_type ) result: dict = { "url": from_str(getattr(self, "url", "")), diff --git a/packages/shared/fastfetchbot_shared/utils/number.py b/packages/shared/fastfetchbot_shared/utils/number.py new file mode 100644 index 0000000..f25cfe5 --- /dev/null +++ b/packages/shared/fastfetchbot_shared/utils/number.py @@ -0,0 +1,8 @@ +def positive_int(value) -> int | None: + if value is None or isinstance(value, bool): + return None + try: + number = int(round(float(value))) + except (TypeError, ValueError): + return None + return number if number > 0 else None diff --git a/tests/unit/async_worker/test_file_id_consumer.py b/tests/unit/async_worker/test_file_id_consumer.py index 37cf073..adb9eae 100644 --- a/tests/unit/async_worker/test_file_id_consumer.py +++ b/tests/unit/async_worker/test_file_id_consumer.py @@ -6,7 +6,6 @@ import pytest - # --------------------------------------------------------------------------- # Fixtures # --------------------------------------------------------------------------- @@ -113,20 +112,70 @@ async def test_updates_matching_media_file(self): MockMetadata.find = MagicMock(return_value=mock_query) MockMetadata.url = "url" - await _process_file_id_update({ - "metadata_url": "https://example.com/post/1", - "file_id_updates": [ - { - "url": "https://img.com/1.jpg", - "media_type": "image", - "telegram_file_id": "AgACAgI123", - }, - ], - }) + await _process_file_id_update( + { + "metadata_url": "https://example.com/post/1", + "file_id_updates": [ + { + "url": "https://img.com/1.jpg", + "media_type": "image", + "telegram_file_id": "AgACAgI123", + }, + ], + } + ) assert mock_mf.telegram_file_id == "AgACAgI123" mock_doc.save.assert_awaited_once() + @pytest.mark.asyncio + async def test_updates_video_dimensions_with_file_id(self): + from async_worker.services.file_id_consumer import _process_file_id_update + + mock_mf = MagicMock() + mock_mf.url = "https://vid.com/v.mp4" + mock_mf.telegram_file_id = None + mock_mf.width = None + mock_mf.height = None + mock_mf.duration = None + + mock_doc = MagicMock() + mock_doc.media_files = [mock_mf] + mock_doc.save = AsyncMock() + + mock_query = MagicMock() + mock_query.sort = MagicMock(return_value=mock_query) + mock_query.limit = MagicMock(return_value=mock_query) + mock_query.first_or_none = AsyncMock(return_value=mock_doc) + + with patch( + "fastfetchbot_shared.database.mongodb.models.metadata.Metadata" + ) as MockMetadata: + MockMetadata.find = MagicMock(return_value=mock_query) + MockMetadata.url = "url" + + await _process_file_id_update( + { + "metadata_url": "https://example.com/post/1", + "file_id_updates": [ + { + "url": "https://vid.com/v.mp4", + "media_type": "video", + "telegram_file_id": "BAACAgI456", + "width": 720, + "height": 1280, + "duration": 14, + }, + ], + } + ) + + assert mock_mf.telegram_file_id == "BAACAgI456" + assert mock_mf.width == 720 + assert mock_mf.height == 1280 + assert mock_mf.duration == 14 + mock_doc.save.assert_awaited_once() + @pytest.mark.asyncio async def test_skips_already_set_file_id(self): from async_worker.services.file_id_consumer import _process_file_id_update @@ -150,16 +199,18 @@ async def test_skips_already_set_file_id(self): MockMetadata.find = MagicMock(return_value=mock_query) MockMetadata.url = "url" - await _process_file_id_update({ - "metadata_url": "https://example.com/post/1", - "file_id_updates": [ - { - "url": "https://img.com/1.jpg", - "media_type": "image", - "telegram_file_id": "new_id", - }, - ], - }) + await _process_file_id_update( + { + "metadata_url": "https://example.com/post/1", + "file_id_updates": [ + { + "url": "https://img.com/1.jpg", + "media_type": "image", + "telegram_file_id": "new_id", + }, + ], + } + ) # Should not overwrite existing file_id assert mock_mf.telegram_file_id == "existing_id" @@ -189,16 +240,18 @@ async def test_no_save_when_no_match(self): MockMetadata.find = MagicMock(return_value=mock_query) MockMetadata.url = "url" - await _process_file_id_update({ - "metadata_url": "https://example.com/post/1", - "file_id_updates": [ - { - "url": "https://img.com/1.jpg", - "media_type": "image", - "telegram_file_id": "AgACAgI123", - }, - ], - }) + await _process_file_id_update( + { + "metadata_url": "https://example.com/post/1", + "file_id_updates": [ + { + "url": "https://img.com/1.jpg", + "media_type": "image", + "telegram_file_id": "AgACAgI123", + }, + ], + } + ) mock_doc.save.assert_not_awaited() @@ -218,16 +271,18 @@ async def test_handles_no_document_found(self): MockMetadata.url = "url" # Should not raise - await _process_file_id_update({ - "metadata_url": "https://example.com/missing", - "file_id_updates": [ - { - "url": "https://img.com/1.jpg", - "media_type": "image", - "telegram_file_id": "AgACAgI123", - }, - ], - }) + await _process_file_id_update( + { + "metadata_url": "https://example.com/missing", + "file_id_updates": [ + { + "url": "https://img.com/1.jpg", + "media_type": "image", + "telegram_file_id": "AgACAgI123", + }, + ], + } + ) @pytest.mark.asyncio async def test_handles_empty_payload(self): @@ -264,21 +319,23 @@ async def test_updates_multiple_media_files(self): MockMetadata.find = MagicMock(return_value=mock_query) MockMetadata.url = "url" - await _process_file_id_update({ - "metadata_url": "https://example.com/post/1", - "file_id_updates": [ - { - "url": "https://img.com/1.jpg", - "media_type": "image", - "telegram_file_id": "photo_id", - }, - { - "url": "https://vid.com/v.mp4", - "media_type": "video", - "telegram_file_id": "video_id", - }, - ], - }) + await _process_file_id_update( + { + "metadata_url": "https://example.com/post/1", + "file_id_updates": [ + { + "url": "https://img.com/1.jpg", + "media_type": "image", + "telegram_file_id": "photo_id", + }, + { + "url": "https://vid.com/v.mp4", + "media_type": "video", + "telegram_file_id": "video_id", + }, + ], + } + ) assert mock_mf1.telegram_file_id == "photo_id" assert mock_mf2.telegram_file_id == "video_id" @@ -305,13 +362,16 @@ async def brpop_side_effect(*args, **kwargs): mock_redis.brpop = AsyncMock(side_effect=brpop_side_effect) - with patch( - "async_worker.services.file_id_consumer.aioredis.from_url", - return_value=mock_redis, - ), patch( - "async_worker.services.file_id_consumer._process_file_id_update", - new_callable=AsyncMock, - ) as mock_process: + with ( + patch( + "async_worker.services.file_id_consumer.aioredis.from_url", + return_value=mock_redis, + ), + patch( + "async_worker.services.file_id_consumer._process_file_id_update", + new_callable=AsyncMock, + ) as mock_process, + ): from async_worker.services.file_id_consumer import _consume_loop await _consume_loop() @@ -362,13 +422,16 @@ async def brpop_side_effect(*args, **kwargs): mock_redis.brpop = AsyncMock(side_effect=brpop_side_effect) - with patch( - "async_worker.services.file_id_consumer.aioredis.from_url", - return_value=mock_redis, - ), patch( - "async_worker.services.file_id_consumer._process_file_id_update", - new_callable=AsyncMock, - side_effect=RuntimeError("MongoDB down"), + with ( + patch( + "async_worker.services.file_id_consumer.aioredis.from_url", + return_value=mock_redis, + ), + patch( + "async_worker.services.file_id_consumer._process_file_id_update", + new_callable=AsyncMock, + side_effect=RuntimeError("MongoDB down"), + ), ): from async_worker.services.file_id_consumer import _consume_loop @@ -399,13 +462,16 @@ async def brpop_side_effect(*args, **kwargs): mock_redis.brpop = AsyncMock(side_effect=brpop_side_effect) - with patch( - "async_worker.services.file_id_consumer.aioredis.from_url", - return_value=mock_redis, - ), patch( - "async_worker.services.file_id_consumer._process_file_id_update", - new_callable=AsyncMock, - side_effect=RuntimeError("still failing"), + with ( + patch( + "async_worker.services.file_id_consumer.aioredis.from_url", + return_value=mock_redis, + ), + patch( + "async_worker.services.file_id_consumer._process_file_id_update", + new_callable=AsyncMock, + side_effect=RuntimeError("still failing"), + ), ): from async_worker.services.file_id_consumer import _consume_loop @@ -430,13 +496,16 @@ async def brpop_side_effect(*args, **kwargs): async def process_side_effect(p): raise asyncio.CancelledError() - with patch( - "async_worker.services.file_id_consumer.aioredis.from_url", - return_value=mock_redis, - ), patch( - "async_worker.services.file_id_consumer._process_file_id_update", - new_callable=AsyncMock, - side_effect=process_side_effect, + with ( + patch( + "async_worker.services.file_id_consumer.aioredis.from_url", + return_value=mock_redis, + ), + patch( + "async_worker.services.file_id_consumer._process_file_id_update", + new_callable=AsyncMock, + side_effect=process_side_effect, + ), ): from async_worker.services.file_id_consumer import _consume_loop @@ -482,7 +551,9 @@ async def test_start_creates_task(self, mock_redis): from async_worker.services import file_id_consumer as fic with patch.object( - fic, "_consume_loop", new_callable=AsyncMock, + fic, + "_consume_loop", + new_callable=AsyncMock, ): await fic.start() assert fic._consumer_task is not None diff --git a/tests/unit/database/mongodb/test_file_id_fields.py b/tests/unit/database/mongodb/test_file_id_fields.py index 41d015e..215b122 100644 --- a/tests/unit/database/mongodb/test_file_id_fields.py +++ b/tests/unit/database/mongodb/test_file_id_fields.py @@ -15,7 +15,6 @@ Metadata, ) - # --------------------------------------------------------------------------- # MediaFile.telegram_file_id # --------------------------------------------------------------------------- @@ -73,6 +72,20 @@ def test_round_trip_with_file_id(self): assert restored.media_type == "video" assert restored.url == "https://vid.com/v.mp4" + def test_round_trip_with_video_dimensions(self): + original = MediaFile( + media_type="video", + url="https://vid.com/v.mp4", + telegram_file_id="BAACAgI456", + width=720, + height=1280, + duration=14, + ) + restored = MediaFile.from_dict(original.to_dict()) + assert restored.width == 720 + assert restored.height == 1280 + assert restored.duration == 14 + def test_round_trip_without_file_id(self): original = MediaFile(media_type="image", url="https://img.com/1.jpg") restored = MediaFile.from_dict(original.to_dict()) diff --git a/tests/unit/telegram_bot/test_file_id_capture.py b/tests/unit/telegram_bot/test_file_id_capture.py index bc6bfdb..b7d81b9 100644 --- a/tests/unit/telegram_bot/test_file_id_capture.py +++ b/tests/unit/telegram_bot/test_file_id_capture.py @@ -6,7 +6,6 @@ import pytest - # --------------------------------------------------------------------------- # Fixtures # --------------------------------------------------------------------------- @@ -183,6 +182,45 @@ async def test_pushes_file_id_updates(self, mock_redis): assert payload["file_id_updates"][0]["telegram_file_id"] == "AgACAgI123" assert payload["file_id_updates"][1]["telegram_file_id"] == "BAACAgI456" + @pytest.mark.asyncio + async def test_preserves_video_dimensions_in_file_id_update(self, mock_redis): + from core.services.file_id_capture import capture_and_push_file_ids + + msg = MagicMock() + msg.video = MagicMock( + file_id="BAACAgI456", + width=720, + height=1280, + duration=14, + ) + + uncached_info = [ + { + "url": "https://vid.com/v.mp4", + "media_type": "video", + "width": 720, + "height": 1280, + "duration": 14, + }, + ] + + with patch( + "core.services.file_id_capture.aioredis.from_url", + return_value=mock_redis, + ): + await capture_and_push_file_ids( + uncached_info=uncached_info, + sent_messages=(msg,), + metadata_url="https://example.com/post/1", + ) + + payload = json.loads(mock_redis.lpush.call_args[0][1]) + update = payload["file_id_updates"][0] + assert update["telegram_file_id"] == "BAACAgI456" + assert update["width"] == 720 + assert update["height"] == 1280 + assert update["duration"] == 14 + @pytest.mark.asyncio async def test_skips_none_entries(self, mock_redis): """None entries in uncached_info are cached items — skip them.""" @@ -254,7 +292,10 @@ async def test_handles_more_uncached_than_messages(self, mock_redis): uncached_info = [ {"url": "https://img.com/1.jpg", "media_type": "image"}, - {"url": "https://img.com/2.jpg", "media_type": "image"}, # no message for this + { + "url": "https://img.com/2.jpg", + "media_type": "image", + }, # no message for this ] with patch( diff --git a/tests/unit/telegram_bot/test_message_sender.py b/tests/unit/telegram_bot/test_message_sender.py index 4f1b321..abecaca 100644 --- a/tests/unit/telegram_bot/test_message_sender.py +++ b/tests/unit/telegram_bot/test_message_sender.py @@ -17,7 +17,11 @@ async def test_http_download_failure_skips_media(self): from core.services.message_sender import media_files_packaging media_files = [ - {"media_type": "image", "url": "https://example.com/img.jpg", "caption": ""}, + { + "media_type": "image", + "url": "https://example.com/img.jpg", + "caption": "", + }, ] data = { "url": "https://example.com", @@ -30,7 +34,9 @@ async def test_http_download_failure_skips_media(self): new_callable=AsyncMock, side_effect=RuntimeError("network error"), ): - media_group, file_group, uncached = await media_files_packaging(media_files, data) + media_group, file_group, uncached = await media_files_packaging( + media_files, data + ) # Media item should be skipped, not crash assert media_group == [] @@ -54,7 +60,9 @@ async def test_gif_download_failure_skips_media(self): new_callable=AsyncMock, side_effect=ConnectionError("timeout"), ): - media_group, file_group, uncached = await media_files_packaging(media_files, data) + media_group, file_group, uncached = await media_files_packaging( + media_files, data + ) assert media_group == [] assert file_group == [] @@ -66,7 +74,9 @@ async def test_image_document_download_failure_skips(self): mock_io = MagicMock() mock_io.name = "media-abc.jpg" - mock_io.size = 15 * 1024 * 1024 # 15MB, over image size limit to trigger document path + mock_io.size = ( + 15 * 1024 * 1024 + ) # 15MB, over image size limit to trigger document path mock_image = MagicMock() mock_image.size = (5000, 5000) # large image to trigger document download @@ -82,7 +92,11 @@ async def download_side_effect(*args, **kwargs): raise RuntimeError("second download failed") media_files = [ - {"media_type": "image", "url": "https://example.com/big.jpg", "caption": ""}, + { + "media_type": "image", + "url": "https://example.com/big.jpg", + "caption": "", + }, ] data = { "url": "https://example.com", @@ -90,28 +104,32 @@ async def download_side_effect(*args, **kwargs): "message_type": "short", } - with patch( - "core.services.message_sender.download_file_by_metadata_item", - new_callable=AsyncMock, - side_effect=download_side_effect, - ), patch( - "core.services.message_sender.check_image_type", - new_callable=AsyncMock, - return_value="jpg", - ), patch( - "core.services.message_sender.Image" - ) as MockImage, patch( - "core.services.message_sender.image_compressing", - return_value=mock_image, - ), patch( - "core.services.message_sender.settings" - ) as mock_settings: + with ( + patch( + "core.services.message_sender.download_file_by_metadata_item", + new_callable=AsyncMock, + side_effect=download_side_effect, + ), + patch( + "core.services.message_sender.check_image_type", + new_callable=AsyncMock, + return_value="jpg", + ), + patch("core.services.message_sender.Image") as MockImage, + patch( + "core.services.message_sender.image_compressing", + return_value=mock_image, + ), + patch("core.services.message_sender.settings") as mock_settings, + ): MockImage.open.return_value = mock_image mock_settings.TELEBOT_API_SERVER = None mock_settings.TELEGRAM_IMAGE_DIMENSION_LIMIT = 2000 mock_settings.TELEGRAM_IMAGE_SIZE_LIMIT = 10 * 1024 * 1024 # 10MB - media_group, file_group, uncached = await media_files_packaging(media_files, data) + media_group, file_group, uncached = await media_files_packaging( + media_files, data + ) # The image media_group should have the photo, but file_group should be empty # because the document download failed and was skipped @@ -138,9 +156,15 @@ async def test_uses_file_id_for_image(self): "telegram_file_id": "AgACAgI123", }, ] - data = {"url": "https://example.com", "category": "twitter", "message_type": "short"} + data = { + "url": "https://example.com", + "category": "twitter", + "message_type": "short", + } - media_group, file_group, uncached = await media_files_packaging(media_files, data) + media_group, file_group, uncached = await media_files_packaging( + media_files, data + ) assert len(media_group) == 1 assert len(media_group[0]) == 1 @@ -158,14 +182,196 @@ async def test_uses_file_id_for_video(self): "telegram_file_id": "BAACAgI456", }, ] - data = {"url": "https://example.com", "category": "twitter", "message_type": "short"} + data = { + "url": "https://example.com", + "category": "twitter", + "message_type": "short", + } - media_group, file_group, uncached = await media_files_packaging(media_files, data) + media_group, file_group, uncached = await media_files_packaging( + media_files, data + ) assert len(media_group) == 1 assert len(media_group[0]) == 1 assert uncached == [None] + @pytest.mark.asyncio + async def test_uses_video_dimensions_with_file_id(self): + from core.services.message_sender import media_files_packaging + + media_files = [ + { + "media_type": "video", + "url": "https://vid.com/v.mp4", + "telegram_file_id": "BAACAgI456", + "width": 720, + "height": 1280, + "duration": 14, + }, + ] + data = { + "url": "https://example.com", + "category": "twitter", + "message_type": "short", + } + + media_group, file_group, uncached = await media_files_packaging( + media_files, data + ) + + video = media_group[0][0] + assert video.media == "BAACAgI456" + assert video.width == 720 + assert video.height == 1280 + assert video.duration == 14 + assert uncached == [None] + + @pytest.mark.asyncio + async def test_uses_video_dimensions_from_media_item_for_download(self): + from core.services.message_sender import media_files_packaging + + mock_io = MagicMock() + mock_io.name = "video.mp4" + mock_io.size = 1024 + + media_files = [ + { + "media_type": "video", + "url": "https://vid.com/v.mp4", + "width": 720, + "height": 1280, + "duration": 14, + }, + ] + data = { + "url": "https://example.com", + "category": "twitter", + "message_type": "short", + } + + with ( + patch( + "core.services.message_sender.download_file_by_metadata_item", + new_callable=AsyncMock, + return_value=mock_io, + ), + patch("core.services.message_sender.settings") as mock_settings, + ): + mock_settings.TELEBOT_API_SERVER = "http://local:8081/bot" + media_group, file_group, uncached = await media_files_packaging( + media_files, data + ) + + video = media_group[0][0] + assert video.width == 720 + assert video.height == 1280 + assert video.duration == 14 + assert uncached == [ + { + "url": "https://vid.com/v.mp4", + "media_type": "video", + "width": 720, + "height": 1280, + "duration": 14, + }, + ] + + @pytest.mark.asyncio + async def test_probes_video_dimensions_for_download_when_missing(self): + from core.services.message_sender import media_files_packaging + + mock_io = MagicMock() + mock_io.name = "video.mp4" + mock_io.size = 1024 + + media_files = [ + { + "media_type": "video", + "url": "https://vid.com/v.mp4", + }, + ] + data = { + "url": "https://example.com", + "category": "twitter", + "message_type": "short", + } + + with ( + patch( + "core.services.message_sender.download_file_by_metadata_item", + new_callable=AsyncMock, + return_value=mock_io, + ), + patch( + "core.services.video_metadata.probe_video_metadata", + new_callable=AsyncMock, + return_value={"width": 720, "height": 1280, "duration": 14}, + create=True, + ) as mock_probe, + patch("core.services.message_sender.settings") as mock_settings, + ): + mock_settings.TELEBOT_API_SERVER = "http://local:8081/bot" + media_group, file_group, uncached = await media_files_packaging( + media_files, data + ) + + video = media_group[0][0] + assert video.width == 720 + assert video.height == 1280 + assert video.duration == 14 + assert uncached[0]["width"] == 720 + assert uncached[0]["height"] == 1280 + assert uncached[0]["duration"] == 14 + mock_probe.assert_awaited_once_with(mock_io) + + @pytest.mark.asyncio + async def test_probes_video_duration_when_dimensions_already_present(self): + from core.services.message_sender import media_files_packaging + + mock_io = MagicMock() + mock_io.name = "video.mp4" + mock_io.size = 1024 + + media_files = [ + { + "media_type": "video", + "url": "https://vid.com/v.mp4", + "width": 720, + "height": 1280, + }, + ] + data = { + "url": "https://example.com", + "category": "twitter", + "message_type": "short", + } + + with ( + patch( + "core.services.message_sender.download_file_by_metadata_item", + new_callable=AsyncMock, + return_value=mock_io, + ), + patch( + "core.services.video_metadata.probe_video_metadata", + new_callable=AsyncMock, + return_value={"duration": 14}, + ) as mock_probe, + patch("core.services.message_sender.settings") as mock_settings, + ): + mock_settings.TELEBOT_API_SERVER = "http://local:8081/bot" + media_group, file_group, uncached = await media_files_packaging( + media_files, data + ) + + video = media_group[0][0] + assert video.width == 720 + assert video.height == 1280 + assert video.duration == 14 + assert uncached[0]["duration"] == 14 + mock_probe.assert_awaited_once_with(mock_io) + @pytest.mark.asyncio async def test_uses_file_id_for_gif(self): from core.services.message_sender import media_files_packaging @@ -177,9 +383,15 @@ async def test_uses_file_id_for_gif(self): "telegram_file_id": "CgACAgI789", }, ] - data = {"url": "https://example.com", "category": "twitter", "message_type": "short"} + data = { + "url": "https://example.com", + "category": "twitter", + "message_type": "short", + } - media_group, file_group, uncached = await media_files_packaging(media_files, data) + media_group, file_group, uncached = await media_files_packaging( + media_files, data + ) assert len(media_group) == 1 assert uncached == [None] @@ -195,9 +407,15 @@ async def test_uses_file_id_for_document(self): "telegram_file_id": "BQACAgI000", }, ] - data = {"url": "https://example.com", "category": "twitter", "message_type": "short"} + data = { + "url": "https://example.com", + "category": "twitter", + "message_type": "short", + } - media_group, file_group, uncached = await media_files_packaging(media_files, data) + media_group, file_group, uncached = await media_files_packaging( + media_files, data + ) assert len(file_group) == 1 assert uncached == [None] @@ -214,7 +432,11 @@ async def test_no_download_when_file_id_present(self): "telegram_file_id": "AgACAgI123", }, ] - data = {"url": "https://example.com", "category": "twitter", "message_type": "short"} + data = { + "url": "https://example.com", + "category": "twitter", + "message_type": "short", + } with patch( "core.services.message_sender.download_file_by_metadata_item", @@ -239,17 +461,24 @@ async def test_uncached_info_for_downloaded_items(self): "url": "https://vid.com/v.mp4", }, ] - data = {"url": "https://example.com", "category": "twitter", "message_type": "short"} + data = { + "url": "https://example.com", + "category": "twitter", + "message_type": "short", + } - with patch( - "core.services.message_sender.download_file_by_metadata_item", - new_callable=AsyncMock, - return_value=mock_io, - ), patch( - "core.services.message_sender.settings" - ) as mock_settings: + with ( + patch( + "core.services.message_sender.download_file_by_metadata_item", + new_callable=AsyncMock, + return_value=mock_io, + ), + patch("core.services.message_sender.settings") as mock_settings, + ): mock_settings.TELEBOT_API_SERVER = "http://local:8081/bot" - media_group, file_group, uncached = await media_files_packaging(media_files, data) + media_group, file_group, uncached = await media_files_packaging( + media_files, data + ) assert len(uncached) == 1 assert uncached[0] is not None @@ -277,17 +506,24 @@ async def test_mixed_cached_and_uncached(self): # no telegram_file_id — will be downloaded }, ] - data = {"url": "https://example.com", "category": "twitter", "message_type": "short"} + data = { + "url": "https://example.com", + "category": "twitter", + "message_type": "short", + } - with patch( - "core.services.message_sender.download_file_by_metadata_item", - new_callable=AsyncMock, - return_value=mock_io, - ), patch( - "core.services.message_sender.settings" - ) as mock_settings: + with ( + patch( + "core.services.message_sender.download_file_by_metadata_item", + new_callable=AsyncMock, + return_value=mock_io, + ), + patch("core.services.message_sender.settings") as mock_settings, + ): mock_settings.TELEBOT_API_SERVER = "http://local:8081/bot" - media_group, file_group, uncached = await media_files_packaging(media_files, data) + media_group, file_group, uncached = await media_files_packaging( + media_files, data + ) assert len(media_group) == 1 # one group with 2 items assert len(media_group[0]) == 2 @@ -308,15 +544,66 @@ async def test_file_id_skips_even_for_long_messages(self): "telegram_file_id": "AgACAgI123", }, ] - data = {"url": "https://example.com", "category": "twitter", "message_type": "long"} + data = { + "url": "https://example.com", + "category": "twitter", + "message_type": "long", + } - media_group, file_group, uncached = await media_files_packaging(media_files, data) + media_group, file_group, uncached = await media_files_packaging( + media_files, data + ) # Should still use file_id even though message_type is "long" assert len(media_group) == 1 assert uncached == [None] +class TestProbeVideoMetadata: + @pytest.mark.asyncio + async def test_offloads_temp_file_write_to_thread(self): + from core.services.video_metadata import probe_video_metadata + + mock_io = MagicMock() + mock_io.name = "video.mp4" + mock_io.tell.return_value = 0 + + temp_file = MagicMock() + temp_file.name = "/tmp/video.mp4" + temp_file.__enter__.return_value = temp_file + temp_file.__exit__.return_value = None + + process = MagicMock() + process.returncode = 0 + process.communicate = AsyncMock( + return_value=(b'{"streams":[{"width":720,"height":1280}]}', b"") + ) + + with ( + patch( + "core.services.video_metadata.shutil.which", + return_value="/usr/bin/ffprobe", + ), + patch( + "core.services.video_metadata.tempfile.NamedTemporaryFile", + return_value=temp_file, + ), + patch( + "core.services.video_metadata.asyncio.to_thread", + new_callable=AsyncMock, + ) as mock_to_thread, + patch( + "core.services.video_metadata.asyncio.create_subprocess_exec", + new_callable=AsyncMock, + return_value=process, + ), + ): + metadata = await probe_video_metadata(mock_io) + + assert metadata == {"width": 720, "height": 1280} + mock_to_thread.assert_awaited_once() + + class TestSendItemMessageExceptionHandling: @pytest.mark.asyncio async def test_exception_logged_and_sent_to_debug_channel(self): @@ -332,8 +619,15 @@ async def test_exception_logged_and_sent_to_debug_channel(self): mock_bot.get_chat = AsyncMock(return_value=mock_chat) mock_bot.send_message = AsyncMock(side_effect=RuntimeError("telegram API down")) - with patch("core.services.message_sender._get_application", return_value=mock_app), \ - patch("core.services.message_sender.send_debug_channel", new_callable=AsyncMock) as mock_debug: + with ( + patch( + "core.services.message_sender._get_application", return_value=mock_app + ), + patch( + "core.services.message_sender.send_debug_channel", + new_callable=AsyncMock, + ) as mock_debug, + ): # Should not raise — the exception is caught and logged await send_item_message( diff --git a/tests/unit/test_number.py b/tests/unit/test_number.py new file mode 100644 index 0000000..7e5368e --- /dev/null +++ b/tests/unit/test_number.py @@ -0,0 +1,17 @@ +from fastfetchbot_shared.utils.number import positive_int + + +def test_positive_int_accepts_positive_ints_and_numeric_strings(): + assert positive_int(12) == 12 + assert positive_int("12") == 12 + assert positive_int("12.4") == 12 + assert positive_int("12.5") == 12 + + +def test_positive_int_rejects_non_positive_bool_and_invalid_values(): + assert positive_int(0) is None + assert positive_int(-1) is None + assert positive_int(True) is None + assert positive_int(False) is None + assert positive_int(None) is None + assert positive_int("not-a-number") is None