Commit cfc3f637 authored by Eliot Berriot's avatar Eliot Berriot

Added sync-podcasts.py

parent 8e8684bd
Collections of scripts to mirror public audio content from Youtube (or other Youtube-DL compatible platforms) and embed proper metadata.
Collections of scripts to mirror public audio content and embed proper metadata for upload on your Funkwhale pod.
## Download a playlist with Youtube-DL as ogg files
......@@ -22,3 +22,43 @@ python3 update-metadata.py *.ogg --album="My Album" --artist="My Artist" --stri
```
At this point, your files are tagged properly :)
## Mirror a podcast via a RSS feed
The `sync-podcasts.py` will mirror one or more podcasts feed to your [Funkwhale](https://funkwhale.audio), by:
- Creating a Funkwhale library per podcast URL
- Downloading audio from podcast feed
- Uploading audio to the corresponding podcast library
### Preparation
```sh
pip3 install --user click aiohttp python-dotenv feedparser
echo "FUNKWHALE_SERVER_URL=https://demo.funkwhale.audio" >> .env
echo "FUNKWHALE_USERNAME=demo" >> .env
echo "FUNKWHALE_PASSWORD=demo" >> .env
echo "PODCAST_URLS=http://podcast1.url/feed.xml http://podcast2.url/feed.xml" >> .env
```
### Mirroring
Simply run:
```sh
./sync-podcasts.py .env
```
### Automation
The script is idempotent and will only mirror new entries. Thus, you can put the script in a crontab:
```sh
crontab -e
# Check new entries every three hours
0 */3 * * * /home/youruser/sync-podcasts.py /home/youruser/.env
```
This diff is collapsed.
#!/usr/bin/env python3
import aiohttp
import asyncio
import os
import argparse
import feedparser
import tempfile
from dotenv import load_dotenv
async def _main(urls, server, username, password):
print("Getting Funkwhale auth token…")
async with aiohttp.ClientSession(headers={}) as session:
token = await login(
session=session, server=server, username=username, password=password
)
print(" Funkwhale login successful!")
async with aiohttp.ClientSession(headers={}) as session:
print("Fetching {} RSS feeds…".format(len(urls)))
feed_promises = [get_feed(session, url) for url in urls]
feeds = await asyncio.gather(*feed_promises)
print(" Feeds fetched!")
async with aiohttp.ClientSession(
headers={"Authorization": "JWT {}".format(token)}
) as funkwhale_session:
print("Retrieving existing libraries…")
libraries = list(await get_libraries(funkwhale_session, server))
print(" {} libraries found!".format(len(libraries)))
feeds_by_url = {}
for url, parsed_feed, _ in feeds:
matching_library = [
l for l in libraries if l["description"] and url in l["description"]
]
if matching_library:
library = matching_library[0]
print("Found matching library for podcast {}".format(url))
else:
print("Creating library for podcast {}".format(url))
library = await create_library(
funkwhale_session,
server,
name=parsed_feed["feed"]["title"],
description="Mirrored from {}".format(url),
)
print("Created library {} for podcast {}".format(library["uuid"], url))
feeds_by_url[url] = {
"url": url,
"library": library,
"parsed_feed": parsed_feed,
}
print("Checking for new entries…")
checked_feeds = await asyncio.gather(
*[
check_feed(
funkwhale_session,
server,
url=f["url"],
library=f["library"],
parsed_feed=f["parsed_feed"],
)
for f in feeds_by_url.values()
]
)
feeds_with_new_entries = [
(url, new_entries) for url, new_entries in checked_feeds if new_entries
]
print(
" Found {} feeds with new entries ({} new entries in total)!".format(
len(feeds_with_new_entries),
sum([len(new_entries) for _, new_entries in feeds_with_new_entries]),
)
)
print("Launching download of new entries…")
upload_semaphore = asyncio.Semaphore(5)
download_semaphore = asyncio.Semaphore(5)
async with aiohttp.ClientSession(headers={}) as download_session:
tasks = [
mirror_entry(
server=server,
download_session=download_session,
upload_session=funkwhale_session,
url=entry_url,
parsed_entry=parsed_entry,
library=feeds_by_url[url]["library"],
download_semaphore=download_semaphore,
upload_semaphore=upload_semaphore,
)
for url, entries in feeds_with_new_entries
for entry_url, parsed_entry in entries
]
await asyncio.gather(*tasks)
print(" {} entries downloaded!".format(len(tasks)))
async def get_feed(session, url):
async with session.get(url) as r:
r.raise_for_status()
content = await r.text()
print(' Fetched {}!'.format(url))
return (url, feedparser.parse(content), True)
async def check_feed(session, server, url, library, parsed_feed):
async with session.get(
server + "/api/v1/uploads/?library={}".format(library["uuid"])
) as r:
r.raise_for_status()
latest_uploads = await r.json()
sources = [u["source"] for u in latest_uploads["results"] if u["source"]]
entries = []
for e in parsed_feed["entries"]:
if e["guid"] in sources:
break
entries.append((e['guid'], e))
return (
url,
entries,
)
async def get_libraries(session, server):
url = server + "/api/v1/libraries/"
libraries = []
while url:
async with session.get(url) as r:
r.raise_for_status()
json_body = await r.json()
for r in json_body["results"]:
libraries.append(r)
url = json_body["next"]
return libraries
async def create_library(session, server, name, description):
payload = {"name": name, "description": description}
async with session.post(server + "/api/v1/libraries/", data=payload) as r:
r.raise_for_status()
json_body = await r.json()
return json_body
async def login(session, server, username, password):
payload = {"username": username, "password": password}
async with session.post(server + "/api/v1/token/", data=payload) as r:
r.raise_for_status()
json_body = await r.json()
return json_body["token"]
async def mirror_entry(
server,
download_session,
upload_session,
url,
parsed_entry,
library,
download_semaphore,
upload_semaphore,
):
async with download_semaphore:
download_link = [l['href'] for l in parsed_entry['links'] if l['type'].startswith('audio')][0]
print(' Downloading {}…'.format(download_link))
async with download_session.get(download_link) as download_r:
download_r.raise_for_status()
data = {
"library": library['uuid'],
"import_reference": 'sync-podcast',
"source": url,
"audio_file": await download_r.read(),
}
print(' Uploading {}…'.format(download_link))
async with upload_semaphore:
async with upload_session.post(server + '/api/v1/uploads/', data=data) as upload_r:
try:
upload_r.raise_for_status()
except:
result = await upload_r.json()
print(' Failed to upload {}: {}'.format(url, result))
return
result = await upload_r.json()
print(' Uploaded {}: ID is {}!'.format(download_link, result['uuid']))
def main(urls, server, username, password):
print("Starting event loop…")
loop = asyncio.get_event_loop()
loop.run_until_complete(_main(urls=urls, server=server, username=username, password=password))
print("Done!")
if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument("config_file", help="Env file to use")
args = parser.parse_args()
load_dotenv(dotenv_path=args.config_file)
funkwhale_server = os.environ["FUNKWHALE_SERVER_URL"]
funkwhale_username = os.environ["FUNKWHALE_USERNAME"]
funkwhale_password = os.environ["FUNKWHALE_PASSWORD"]
feed_urls = [url.strip() for url in os.environ["PODCAST_URLS"].split(" ") if url]
main(
urls=feed_urls,
server=funkwhale_server,
username=funkwhale_username,
password=funkwhale_password,
)
......@@ -43,9 +43,7 @@ def update_tags(filename, metadata):
m_file.save()
def main(
files, album, artist, strip_prefix, strip_suffix, tags, date_format
):
def main(files, album, artist, strip_prefix, strip_suffix, tags, date_format):
tags = json.loads(tags)
print("Updating metadata for {} files…".format(len(files)))
......@@ -71,9 +69,7 @@ if __name__ == "__main__":
parser.add_argument("file", help="Files to update", nargs="+")
parser.add_argument("--album", help="Album name to use", required=True)
parser.add_argument("--artist", help="Artist name to use", required=True)
parser.add_argument(
"--tags", help="Additional tags, in JSON format", default="{}"
)
parser.add_argument("--tags", help="Additional tags, in JSON format", default="{}")
parser.add_argument(
"--strip-prefix", help="Remove this prefix from the filename", default=""
)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment