221 lines
7.2 KiB
Python
221 lines
7.2 KiB
Python
from __future__ import annotations
|
|
|
|
import json
|
|
import logging
|
|
import os
|
|
import subprocess
|
|
import tempfile
|
|
import uuid
|
|
from datetime import datetime
|
|
from pathlib import Path
|
|
from typing import Dict, List
|
|
|
|
import requests
|
|
from PIL import Image
|
|
|
|
from ..db import db_session
|
|
from ..models import Project, SourceClip
|
|
from . import storage
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
def resolve_media_urls_from_input(url: str) -> List[str]:
|
|
"""Resolve direct media URLs for a given user-provided URL using yt-dlp."""
|
|
|
|
try:
|
|
process = subprocess.run(
|
|
[
|
|
"yt-dlp",
|
|
"--dump-json",
|
|
"--skip-download",
|
|
url,
|
|
],
|
|
check=False,
|
|
capture_output=True,
|
|
text=True,
|
|
)
|
|
except FileNotFoundError:
|
|
logger.warning("yt-dlp not installed; returning provided URL directly")
|
|
return [url]
|
|
|
|
if process.returncode != 0:
|
|
logger.error("yt-dlp failed for %s: %s", url, process.stderr.strip())
|
|
return [url]
|
|
|
|
media_urls: List[str] = []
|
|
for line in process.stdout.strip().splitlines():
|
|
try:
|
|
payload = json.loads(line)
|
|
except json.JSONDecodeError:
|
|
continue
|
|
url_field = payload.get("url") or payload.get("webpage_url")
|
|
if url_field:
|
|
media_urls.append(url_field)
|
|
if not media_urls:
|
|
media_urls.append(url)
|
|
return media_urls
|
|
|
|
|
|
def download_media_file(media_url: str) -> str:
|
|
"""Download a media file to a temporary local path and return it."""
|
|
|
|
response = requests.get(media_url, stream=True, timeout=60)
|
|
response.raise_for_status()
|
|
|
|
suffix = Path(media_url).suffix or ".mp4"
|
|
with tempfile.NamedTemporaryFile(delete=False, suffix=suffix) as temp_file:
|
|
for chunk in response.iter_content(chunk_size=8192):
|
|
if chunk:
|
|
temp_file.write(chunk)
|
|
temp_path = temp_file.name
|
|
return temp_path
|
|
|
|
|
|
def extract_video_metadata(local_path: str) -> Dict[str, float | int | None]:
|
|
"""Extract video metadata using ffprobe."""
|
|
|
|
command = [
|
|
"ffprobe",
|
|
"-v",
|
|
"error",
|
|
"-select_streams",
|
|
"v:0",
|
|
"-show_entries",
|
|
"stream=width,height,r_frame_rate:format=duration",
|
|
"-of",
|
|
"json",
|
|
local_path,
|
|
]
|
|
result = subprocess.run(command, capture_output=True, text=True, check=False)
|
|
if result.returncode != 0:
|
|
raise RuntimeError(f"ffprobe failed: {result.stderr}")
|
|
|
|
payload = json.loads(result.stdout)
|
|
stream = (payload.get("streams") or [{}])[0]
|
|
format_info = payload.get("format") or {}
|
|
|
|
r_frame_rate = stream.get("r_frame_rate", "0/1")
|
|
try:
|
|
num, den = r_frame_rate.split("/")
|
|
fps = float(num) / float(den) if float(den) else None
|
|
except (ValueError, ZeroDivisionError):
|
|
fps = None
|
|
|
|
metadata = {
|
|
"duration_seconds": float(format_info.get("duration")) if format_info.get("duration") else None,
|
|
"width": stream.get("width"),
|
|
"height": stream.get("height"),
|
|
"fps": fps,
|
|
}
|
|
return metadata
|
|
|
|
|
|
def generate_thumbnail(local_video_path: str, time_seconds: float = 0.5) -> bytes:
|
|
"""Generate a thumbnail image for a video clip using ffmpeg."""
|
|
|
|
resized_path: str | None = None
|
|
with tempfile.NamedTemporaryFile(suffix=".jpg", delete=False) as temp_image:
|
|
temp_image_path = temp_image.name
|
|
|
|
try:
|
|
command = [
|
|
"ffmpeg",
|
|
"-ss",
|
|
str(time_seconds),
|
|
"-i",
|
|
local_video_path,
|
|
"-frames:v",
|
|
"1",
|
|
"-q:v",
|
|
"2",
|
|
temp_image_path,
|
|
]
|
|
result = subprocess.run(command, capture_output=True, check=False)
|
|
if result.returncode != 0:
|
|
raise RuntimeError(f"ffmpeg thumbnail generation failed: {result.stderr}")
|
|
|
|
with Image.open(temp_image_path) as img:
|
|
width = 480
|
|
ratio = width / float(img.width)
|
|
resized = img.resize((width, int(img.height * ratio)))
|
|
with tempfile.NamedTemporaryFile(suffix=".jpg", delete=False) as resized_file:
|
|
resized.save(resized_file.name, format="JPEG", quality=90)
|
|
resized_path = resized_file.name
|
|
if resized_path is None:
|
|
raise RuntimeError("Failed to create thumbnail image")
|
|
with open(resized_path, "rb") as thumbnail_file:
|
|
data = thumbnail_file.read()
|
|
finally:
|
|
if os.path.exists(temp_image_path):
|
|
os.remove(temp_image_path)
|
|
if resized_path and os.path.exists(resized_path):
|
|
os.remove(resized_path)
|
|
return data
|
|
|
|
|
|
def ingest_single_media_url(project_id: str, input_url: str, origin: str = "url") -> List[Dict]:
|
|
"""Ingest media from a URL and persist SourceClip entries."""
|
|
|
|
media_urls = resolve_media_urls_from_input(input_url)
|
|
created_clips: List[Dict] = []
|
|
|
|
with db_session() as session:
|
|
project = session.query(Project).filter_by(id=project_id).one_or_none()
|
|
if project is None:
|
|
raise ValueError(f"Project {project_id} does not exist")
|
|
|
|
for media_url in media_urls:
|
|
local_path = download_media_file(media_url)
|
|
try:
|
|
metadata = extract_video_metadata(local_path)
|
|
thumbnail_bytes = generate_thumbnail(local_path)
|
|
|
|
clip_id = str(uuid.uuid4())
|
|
extension = Path(local_path).suffix or ".mp4"
|
|
storage_dest = f"videos/{project_id}/{clip_id}{extension}"
|
|
thumb_dest = f"thumbnails/{project_id}/{clip_id}.jpg"
|
|
|
|
with open(local_path, "rb") as infile:
|
|
storage_path = storage.upload_file(infile, storage_dest)
|
|
thumbnail_path = storage.upload_bytes(thumbnail_bytes, thumb_dest, content_type="image/jpeg")
|
|
|
|
now = datetime.utcnow()
|
|
clip = SourceClip(
|
|
id=clip_id,
|
|
project_id=project_id,
|
|
origin=origin,
|
|
original_url=input_url,
|
|
storage_path=storage_path,
|
|
thumbnail_path=thumbnail_path,
|
|
duration_seconds=metadata.get("duration_seconds"),
|
|
width=metadata.get("width"),
|
|
height=metadata.get("height"),
|
|
fps=metadata.get("fps"),
|
|
created_at=now,
|
|
updated_at=now,
|
|
)
|
|
session.add(clip)
|
|
session.flush()
|
|
|
|
created_clips.append(
|
|
{
|
|
"id": clip.id,
|
|
"project_id": clip.project_id,
|
|
"storage_path": clip.storage_path,
|
|
"thumbnail_path": clip.thumbnail_path,
|
|
"duration_seconds": clip.duration_seconds,
|
|
"width": clip.width,
|
|
"height": clip.height,
|
|
"fps": clip.fps,
|
|
"origin": clip.origin,
|
|
"original_url": clip.original_url,
|
|
}
|
|
)
|
|
finally:
|
|
if os.path.exists(local_path):
|
|
os.remove(local_path)
|
|
session.commit()
|
|
|
|
return created_clips
|