Recently, while working on a Firebase project, I realized the need for a tool that can perform backup and restore for Firestore databases, as well as extract production data for local or staging environments. While there is a commercial project, Firefoo - The Powerful GUI Client for Firebase Firestore, which is easy to use, it’s quite slow and automation may not be possible.

However, there is an official way to achieve this by using the gcloud command. Essentially, you can export the data to a bucket, which can then be copied to local storage using rclone remote.

gcloud firestore export gs://REDACTED.appspot.com/backup-$DATE
rclone copy -P REDACTED:REDACTED.appspot.com/backup-$DATE/ .
firebase emulators:start --import backup-$DATE

But if you ever want to have actual control and do the filtering via the excellent jq for example, you are going to need a way to export the data to JSON. This script below handles the basic operations needed, which also includes handling sub-collections and can manage a very large collection thanks to the implementation via stream and JSONL.

To export the whole database, you can do it like this:

curl http://REDACTED/maintainance\?secret\=$SECRET\&action\=listCol | xargs -I{} bash -c "echo {} && curl REDACTED/maintainance\?secret\=$SECRET\&action\=getCol\&col\={}\&withSubCollections\=1 | jq -c > {}.jsonl" \;
/**
 *  Maintainances
 */
app.post("/maintainance", (req, res) => {
  (async () => {
    if (req.query.secret !== SECRET) {
      res.status(500).json({ error: "what do you want from me?" });
      return;
    }

    const allowActions = [
      "count",
      "listCol",
      "getCol",
      "importCol",
      "DANGEROUSLY_delCol",
    ] as const;

    const action = req.query.action as unknown as typeof allowActions[number];

    if (action === "count") {
      const cols = (req.query.col as string).split(","); // let it crash
      const count: { [k: string]: number } = {};
      for (const col of cols) {
        const snapshot = await db.collection(col).select().get();
        count[col] = snapshot.size;
      }
      res.json({ count });
    } else if (action === "getCol") {
      const rootCol = db.collection(req.query.col as string); // let it crash
      const withSubCollections = !!req.query.withSubCollections;

      /**
       * should be pairs of doc/col/doc/col... of the req.query.col as root collection
       */
      let subCollectionsQueue = new Set<string>();
      let pending: Promise<unknown>[] = [];

      const snapshot =
        rootCol.stream() as unknown as AsyncIterableIterator<DocumentData>;

      for await (const doc of snapshot) {
        res.write(JSON.stringify({ id: doc.id, data: doc.data() }));
        if (withSubCollections) {
          pending.push(
            (async () => {
              const subCols = await doc.ref.listCollections();
              if (subCols.length > 0) {
                for (const subCol of subCols) {
                  if (!subCollectionsQueue.has(subCol.id)) {
                    console.log("  queue subCollection", [
                      req.query.col,
                      subCol.id,
                    ]);
                    subCollectionsQueue.add(subCol.id);
                  }
                }
              }
            })()
          );
        }
      }

      if (withSubCollections) {
        if (pending.length > 0) {
          console.log(`wait for pending operations: ${req.query.col}`);
          await Promise.all(pending);
          pending = [];
        }

        while (subCollectionsQueue.size > 0) {
          const path = subCollectionsQueue.values().next().value;
          subCollectionsQueue.delete(path);
          const col = db.collectionGroup(path);
          const snapshot =
            col.stream() as unknown as AsyncIterableIterator<DocumentData>;
          for await (const doc of snapshot) {
            pending.push(
              (async () => {
                const subCols = await doc.ref.listCollections();
                if (subCols.length > 0) {
                  for (const subCol of subCols) {
                    if (!subCollectionsQueue.has(subCol.id)) {
                      console.log("  queue subCollection", [
                        req.query.col,
                        subCol.id,
                      ]);
                      subCollectionsQueue.add(subCol.id);
                    }
                  }
                }
              })()
            );
            let parent = doc.ref.parent;
            let paths = [parent.id, doc.id];
            while (true) {
              parent = parent.parent;
              if (!parent) {
                break;
              }
              paths.unshift(parent.id);
            }
            const rootName = paths.shift();
            if (rootName === req.query.col) {
              res.write(
                JSON.stringify({
                  id: paths.join("/"),
                  data: doc.data(),
                })
              );
            }
          }
          if (pending.length > 0) {
            console.log(
              `wait for pending operations: ${req.query.col},... ${path}`
            );
            await Promise.all(pending);
            pending = [];
          }
        }
      }

      res.end();
    } else if (action === "listCol") {
      const cols = await db.listCollections();
      res.end(cols.map((it) => it.id).join("\n"));
    } else if (action === "importCol") {
      const col = db.collection(req.query.col as string); // let it crash
      const withSubCollections = !!req.query.withSubCollections;
      const pipeline = req.pipe(
        jsonlParser()
      ) as unknown as AsyncIterableIterator<{
        value: { id: string; data: unknown };
      }>;
      let count = 0;
      let batch = db.batch();
      let cap = 0;

      for await (const item of pipeline) {
        count += 1;
        if (cap > 400) {
          await batch.commit();
          cap = 0;
          batch = db.batch();
        }

        cap = cap + 1;
        if (withSubCollections) {
          const path = item.value.id.split("/");
          let ref = col.doc(path.shift()!);
          if (path.length > 1) {
            ref = ref.collection(path.shift()!).doc(path.shift()!);
          }
          batch.create(ref, item.value.data);
        } else {
          const ref = col.doc(item.value.id);
          batch.create(ref, item.value.data);
        }
      }

      await batch.commit();
      res.json({ count });
    } else if (action === "DANGEROUSLY_delCol") {
      const cols = (req.query.col as string).split(","); // let it crash
      const count: { [k: string]: number } = {};
      for (const col of cols) {
        const snapshot = await db.collection(col).select().get();
        let batch = db.batch();
        let cap = 0;
        for (const item of snapshot.docs) {
          if (cap > 400) {
            await batch.commit();
            cap = 0;
            batch = db.batch();
          }

          cap = cap + 1;
          batch.delete(item.ref);
        }
        await batch.commit();
        count[col] = snapshot.size;
      }
      res.json({ count });
    } else {
      res.end(allowActions.join("\n"));
    }
  })();
});