~tieong/libreoffice-helpers

fe0d1692870db09f5268947a5114c043b84a88bb — Thomas Ieong 1 year, 1 day ago
Init
2 files changed, 269 insertions(+), 0 deletions(-)

A README.org
A libreoffice_helpers.py
A  => README.org +29 -0
@@ 1,29 @@
* LibreOffice helpers

Librairie regroupant quelques routines pour manipuler LibreOffice depuis Python.

Pour plus d'informations voir le livre Professional UNO/wiki de LibreOffice.

* Utilisation

#+begin_src sh
from pathlib import Path
from libreoffice_helpers import (
    get_used_area,
    open_ods_file,
    LOConnect
)

my_path = Path("foo")

properties = (
    PropertyValue(Name='Hidden', Value=True),
    PropertyValue(Name="ReadOnly", Value=True),
)

with LOConnect(use_socket=False, use_temp_profile=True) as xContext:
    smgr = xContext.ServiceManager
    desktop = smgr.createInstanceWithContext("com.sun.star.frame.Desktop", xContext)
    open_ods_file(my_path, desktop, properties)
#+end_src


A  => libreoffice_helpers.py +240 -0
@@ 1,240 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-

import uno
import os
import random
import logging
import subprocess
import tempfile
import shutil
import errno
from sys import platform
from time import sleep
from pathlib import Path
from typing import Tuple
from com.sun.star.beans import PropertyValue
from com.sun.star.connection import NoConnectException
from com.sun.star.uno import Exception as UnoException
from com.sun.star.io import IOException
from com.sun.star.lang import IllegalArgumentException


def get_used_area(sheet):
    """Get all the cells that contains a value."""
    cursor = sheet.createCursor()
    cursor.gotoEndOfUsedArea(False)
    cursor.gotoStartOfUsedArea(True)
    return cursor

def open_ods_file(
        ods_file_path: Path,
        desktop: "com.sun.star.frame.Desktop",
        properties: Tuple[PropertyValue]
):
    """
    Ouverture d'un fichier calc en mode headless.

    C'est à dire qu'il ne devrait pas ouvrir une fenêtre
    libreoffice.

    On doit cast le chemin vers un str obligatoirement car la méthode
    d'uno ne supporte pas les Path objects.
    """
    if not ods_file_path.exists():
        raise FileNotFoundError(errno.ENOENT, os.strerror(errno.ENOENT), ods_file_path)

    path = uno.systemPathToFileUrl(str(ods_file_path))
    try:
        doc = desktop.loadComponentFromURL(path, '_default', 0, properties)
    except IOException:
        raise
    except IllegalArgumentException:
        raise

    # According to this
    # https://api.libreoffice.org/docs/idl/ref/interfacecom_1_1sun_1_1star_1_1frame_1_1XComponentLoader.html
    # it just retuns a null if it fails, we can't know the reason...
    if doc is None:
        raise ValueError("Could not load the document!")

    return doc


class BootstrapException(UnoException):
    pass


class LOConnect:
    def __init__(
            self,
            *,
            timeout: int = 120,
            use_temp_profile: bool,
            use_socket: bool,
            port: str = "",
            interface: str = ""
    ):
        """
        Args

            timeout: Number of tries to connect to LibreOffice server.
            Note that I did not find any other way than a fixed number of tries
            to connect to the LO server.

            use_temp_profile: This should be enabled whenever you can
            basically you can spin up multiple LO instances and let's say
            you try to convert a bunch of documents to pdf in parallels in dirA and in dirB.

            You launch the commands at the same time and you will notice that there is only
            one process actually doing the conversion, this is because they use the same user profile
            and a LibreOffice instance can only use one user profile so in this case multiple LO instances
            are trying to access the same user profile and it won't work.

            To get LO instances to do work at the same time you need to create a different user profile
            for each instance.

            Finally note that it's a little slower to create a temporary profile, it's noticeable
            on slow hdd where it takes like a full minute so tune up the timeout parameter
            according to your observations.

            use_socket: By default LOConnect will try to connect using a pipe, this use sockets.

            port: The port to connect when use_socket is on.

            interface: Interface to use when using sockets (e.g localhost, 127.0.0.1 etc).
        """
        self.soffice = None
        self.process = None
        self.tmpdir = ""
        self.timeout = timeout
        self.use_temp_profile = use_temp_profile
        self.use_socket = use_socket
        self.port = port
        self.interface = interface

    def __enter__(self):
        return self.bootstrap()

    def __exit__(self, type, value, traceback):
        i = 0
        smgr = self.soffice.ServiceManager
        desktop = smgr.createInstanceWithContext("com.sun.star.frame.Desktop", self.soffice)
        is_terminated = desktop.terminate()

        while i < 50 and not is_terminated:
            is_terminated = desktop.terminate()
            i += 1

        if not is_terminated:
            logging.error(
                (
                    "Impossible de quitter LibreOffice proprement ! "
                    "On tue le processus de LibreOffice via le gestionnaire des tâches."
                )
            )
            if platform == "win32":
                try:
                    subprocess.run(['taskkill', '/F', '/T', '/PID',  str(self.process.pid)], check=True)
                except subprocess.CalledProcessError as e:
                    # Handling the race condition there.
                    process_not_found = 128
                    if e.returncode == process_not_found:
                        pass
                    else:
                        logging.exception("Erreur lors de la fermeture de LibreOffice via la manière forte !", e)
            else:
                raise NotImplementedError

        try:
            # We need to wait a little bit before trying to remove the old user profile
            sleep(5)
            shutil.rmtree(self.tmpdir)
        except:
            logging.warning("Could not delete LibreOffice user profiel tmpdir at '%s'", self.tmpdir)

    def bootstrap(self):
        """Bootstrap OOo and PyUNO Runtime.
        The soffice process is started opening a named pipe of random name, then the local context is used
        to access the pipe. This function directly returns the remote component context, from whereon you can
        get the ServiceManager by calling getServiceManager() on the returned object.
        """
        try:
           # soffice script used on *ix, Mac; soffice.exe used on Win
            if "UNO_PATH" in os.environ:
                sOffice = os.environ["UNO_PATH"]
            else:
                sOffice = "" # lets hope for the best
            sOffice = os.path.join(sOffice, "soffice")
            if platform.startswith("win"):
                sOffice += ".exe"
     
            # Generate a random pipe name.
            random.seed()
            sPipeName = "uno" + str(random.random())[2:]
     
            # Start the office process, don't check for exit status since an exception is caught anyway if the office terminates unexpectedly.
            self.tmpdir = tempfile.mkdtemp()
            self.tmp_uri = Path(self.tmpdir).as_uri()

            connection = (
                "socket,host=%s,port=%s,tcpNoDelay=1;urp;StarOffice.ComponentContext"
                % (self.interface, self.port)
            )

            cmd = [
                sOffice,
                "--headless",
                "--invisible",
                "--nocrashreport",
                "--nodefault",
                "--quickstart=no",
                "--nologo",
                "--nofirststartwizard",
                "--norestore",
            ]

            if self.use_temp_profile:
                cmd += ["-env:UserInstallation={}".format(self.tmp_uri)]

            if self.use_socket:
                cmd += ["--accept={}".format(connection)]
                sConnect = "uno:socket,host={},port={};urp;StarOffice.ComponentContext".format(
                    self.interface, self.port
                )
            else:
                cmd += ["".join(["--accept=pipe,name=", sPipeName, ";urp;StarOffice.ComponentContext"])]
                sConnect = "".join(["uno:pipe,name=", sPipeName, ";urp;StarOffice.ComponentContext"])

            logging.debug("Lancement de LibreOffice avec cette commande '%s'", cmd)

            self.process = subprocess.Popen(cmd)
     
            # ---------
     
            xLocalContext = uno.getComponentContext()
            resolver = xLocalContext.ServiceManager.createInstanceWithContext(
                "com.sun.star.bridge.UnoUrlResolver", xLocalContext)

            # Wait until an office is started, but loop only nLoop times (can we do this better???)
            nLoop = self.timeout
            while True:
                try:
                    xContext = resolver.resolve(sConnect)
                    break
                except NoConnectException:
                    nLoop -= 1
                    if nLoop <= 0:
                        raise BootstrapException("Cannot connect to soffice server.", None)
                    logging.debug("Trying to connect to LO, we'are at the '%s'nth try", nLoop)
                    sleep(0.5)  # Sleep 1/2 second.
     
        except BootstrapException:
            raise
        except Exception as e:  # Any other exception
            raise BootstrapException("Caught exception " + str(e), None)

        self.soffice = xContext
        
        return xContext