/*
 *  $Id: ttffile.c 22626 2019-10-30 09:14:30Z yeti-dn $
 *  Copyright (C) 2019 David Necas (Yeti).
 *  E-mail: yeti@gwyddion.net.
 *
 *  This program is free software; you can redistribute it and/or modify
 *  it under the terms of the GNU General Public License as published by
 *  the Free Software Foundation; either version 2 of the License, or
 *  (at your option) any later version.
 *
 *  This program is distributed in the hope that it will be useful,
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *  GNU General Public License for more details.
 *
 *  You should have received a copy of the GNU General Public License
 *  along with this program; if not, write to the Free Software
 *  Foundation, Inc., 51 Franklin Street, Fifth Floor,
 *  Boston, MA 02110-1301, USA.
 */

/* XXX: The data part of Corning TTF files seems to be in tag
 * CORNING_TIFFTAG_DATA and it is compressed somehow.  First, the NaNs around
 * the images are removed (by specifying just counts probably), but the rest
 * is still represented in some strange manner.  An zero-filled file, for
 * instance, has mostly zero-filled data blocks, but with only ~ 3 bits per
 * pixel.  A normal file with actual data takes approximately 4x that and
 * the data mostly look like random numbers.  It is either compression, or
 * some more sophisticated transformation.  */

/******************** BREAK THE PARSER
 * [FILE-MAGIC-USERGUIDE]
 * Corning Tropel UltraSort topograhpical data
 * .ttf
 * Read
 **/

/**
 * [FILE-MAGIC-USERGUIDE]
 * Corning Tropel exported CSV data
 * .csv
 * Read
 **/

/**
 * [FILE-MAGIC-MISSING]
 * Indistinguishable from CSV.  Avoding clash with a standard file format.
 * Module broken.
 **/

#include "config.h"
#include <stdlib.h>
#include <libgwyddion/gwymath.h>
#include <libprocess/correct.h>
#include <app/gwymoduleutils-file.h>
#include <app/data-browser.h>
#include "err.h"
#include "gwytiff.h"

#define MAGIC "Mapid: "
#define MAGIC_SIZE (sizeof(MAGIC)-1)

/* NB: These are decimal values, not 0x8000 as one would expect. */
enum {
    /* This is simply their private alias of GWY_TIFF_DOUBLE.  Dunno why they
     * have it. */
    CORNING_TIFF_DOUBLE   = 8000,

    CORNING_TIFFTAG_FIRST = 8001,
    CORNING_TIFFTAG_DX    = 8009,
    CORNING_TIFFTAG_XRES  = 8152,
    CORNING_TIFFTAG_YRES  = 8153,
    CORNING_TIFFTAG_DATA  = 8154,
};

typedef struct {
    gint xres;
    gint yres;
    gdouble xreal;
    gdouble yreal;
    gchar *units;
} CorningCSVHeader;

static gboolean      module_register        (void);
static gint          ttf_detect             (const GwyFileDetectInfo *fileinfo,
                                             gboolean only_name);
static GwyContainer* ttf_load               (const gchar *filename,
                                             GwyRunType mode,
                                             GError **error);
static gboolean      fix_corning_double_tags(GwyTIFF *tiff,
                                             GError **error);
static gint          ccsv_detect            (const GwyFileDetectInfo *fileinfo,
                                             gboolean only_name);
static GwyContainer* ccsv_load              (const gchar *filename,
                                             GwyRunType mode,
                                             GError **error);
static gchar*        ccsv_read_header       (CorningCSVHeader *header,
                                             gchar *p,
                                             GError **error);
static gboolean      ccsv_read_images       (const CorningCSVHeader *header,
                                             gchar *p,
                                             GwyDataField **dfield,
                                             GwyDataField **mask,
                                             GError **error);

static GwyModuleInfo module_info = {
    GWY_MODULE_ABI_VERSION,
    module_register,
    N_("Imports Corning Tropel UltraSort files."),
    "Yeti <yeti@gwyddion.net>",
    "0.1",
    "David Nečas (Yeti)",
    "2019",
};

GWY_MODULE_QUERY2(module_info, ttffile)

static gboolean
module_register(void)
{
    gwy_file_func_register("ttffile",
                           N_("Corning Tropel UltraSort data (.ttf)"),
                           (GwyFileDetectFunc)&ttf_detect,
                           (GwyFileLoadFunc)&ttf_load,
                           NULL,
                           NULL);
    gwy_file_func_register("corningcsvfile",
                           N_("Corning Tropel UltraSort CSV export (.csv)"),
                           (GwyFileDetectFunc)&ccsv_detect,
                           (GwyFileLoadFunc)&ccsv_load,
                           NULL,
                           NULL);

    return TRUE;
}

static gint
ttf_detect(const GwyFileDetectInfo *fileinfo, gboolean only_name)
{
    GwyTIFF *tiff;
    guint score = 0;
    GwyTIFFVersion version = GWY_TIFF_CLASSIC;
    guint byteorder = G_LITTLE_ENDIAN;
    gchar *make = NULL, *model = NULL;
    const GwyTIFFEntry *entry;
    guint xres, yres;

    if (only_name)
        return score;

    /* Weed out non-TIFFs */
    if (!gwy_tiff_detect(fileinfo->head, fileinfo->buffer_len,
                         &version, &byteorder))
        return 0;

    /* Use GwyTIFF for detection to avoid problems with fragile libtiff.
     * Progressively try more fine tests. */
    if ((tiff = gwy_tiff_load(fileinfo->name, NULL))
         && gwy_tiff_get_string0(tiff, GWY_TIFFTAG_MAKE, &make)
         && gwy_tiff_get_string0(tiff, GWY_TIFFTAG_MODEL, &model)
         && gwy_tiff_find_tag(tiff, 0, CORNING_TIFFTAG_FIRST)
         && (entry = gwy_tiff_find_tag(tiff, 0, CORNING_TIFFTAG_DX))
         && entry->type == (guint)CORNING_TIFF_DOUBLE
         && (entry = gwy_tiff_find_tag(tiff, 0, CORNING_TIFFTAG_DATA))
         && entry->type == (guint)GWY_TIFF_LONG
         && gwy_tiff_get_uint0(tiff, CORNING_TIFFTAG_XRES, &xres)
         && gwy_tiff_get_uint0(tiff, CORNING_TIFFTAG_YRES, &yres))
        score = 100;

    /* We could check these for "UltraSort" and "Corning Tropel", but who knows
     * how stable they are. */
    g_free(model);
    g_free(make);
    if (tiff)
        gwy_tiff_free(tiff);

    return score;
}

static GwyContainer*
ttf_load(const gchar *filename,
         G_GNUC_UNUSED GwyRunType mode,
         GError **error)
{
    GwyTIFF *tiff;
    GwyContainer *container = NULL;

    tiff = gwy_tiff_load(filename, error);
    if (!tiff)
        return NULL;

    if (!fix_corning_double_tags(tiff, error))
        goto fail;

#if 0
    {
        const GwyTIFFEntry *entry;
        guint offset, itemsize;
        const guchar *p;

        entry = gwy_tiff_find_tag(tiff, 0, CORNING_TIFFTAG_DATA);
        p = entry->value;
        offset = tiff->get_length(&p);
        itemsize = gwy_tiff_data_type_size(entry->type);
        g_file_set_contents("ttf-dump.dat",
                            tiff->data + offset, entry->count * itemsize,
                            NULL);
    }
#endif

    err_NO_DATA(error);

fail:
    gwy_tiff_free(tiff);

    return container;
}

/* Change CORNING_TIFF_DOUBLE to GWY_TIFF_DOUBLE and revalidate all tags.  This
 * is the easiest way of dealing with them. */
static gboolean
fix_corning_double_tags(GwyTIFF *tiff, GError **error)
{
    GArray *tags;
    GwyTIFFEntry *entry;
    guint dirno, n, i;

    for (dirno = 0; dirno < tiff->dirs->len; dirno++) {
        tags = (GArray*)g_ptr_array_index(tiff->dirs, dirno);
        n = tags->len;

        for (i = 0; i < n; i++) {
            entry = &g_array_index(tags, GwyTIFFEntry, i);
            if ((guint)entry->type == CORNING_TIFF_DOUBLE)
                entry->type = GWY_TIFF_DOUBLE;
        }

    }

    return gwy_tiff_tags_valid(tiff, error);
}

static gint
ccsv_detect(const GwyFileDetectInfo *fileinfo,
            gboolean only_name)
{
    /* They many not be all there, for instance of the last four we expect
     * about two... */
    const gchar *wanted_strings[] = {
        "Time: ", "Size: ", "Zoom: ", "Units: ", "ZRes: ", "Outside: ",
        "Sensitivity: ", "Scale: ", "Mapformat: ",
        "Tropel", "Corning", "UltraSort", "TMSPlot",
    };
    guint i, is_not_ccsv = 100;

    if (only_name)
        return 0;

    if (strncmp(fileinfo->head, MAGIC, MAGIC_SIZE))
        return 0;

    for (i = 0; i < G_N_ELEMENTS(wanted_strings); i++) {
        if (strstr(fileinfo->head, wanted_strings[i])) {
            gwy_debug("found %s", wanted_strings[i]);
            is_not_ccsv = 2*is_not_ccsv/3;
        }
    }
    gwy_debug("is_not %d", is_not_ccsv);

    return 100 - is_not_ccsv;
}

static GwyContainer*
ccsv_load(const gchar *filename,
          G_GNUC_UNUSED GwyRunType mode,
          GError **error)
{
    CorningCSVHeader header;
    GwyContainer *container = NULL;
    GwyDataField *dfield = NULL, *mask = NULL;
    GError *err = NULL;
    gchar *buffer, *p;
    GQuark quark;
    gsize size;

    gwy_clear(&header, 1);
    if (!g_file_get_contents(filename, &buffer, &size, &err)) {
        err_GET_FILE_CONTENTS(error, &err);
        return NULL;
    }

    if (strncmp(buffer, MAGIC, MAGIC_SIZE)) {
        err_FILE_TYPE(error, "Corning CSV");
        goto fail;
    }
    if (!(p = ccsv_read_header(&header, buffer, error)))
        goto fail;
    if (err_DIMENSION(error, header.xres)
        || err_DIMENSION(error, header.yres))
        goto fail;
    if (!ccsv_read_images(&header, p, &dfield, &mask, error))
        goto fail;

    container = gwy_container_new();
    quark = gwy_app_get_data_key_for_id(0);
    gwy_container_set_object(container, quark, dfield);
    quark = gwy_app_get_mask_key_for_id(0);
    gwy_container_set_object(container, quark, mask);
    gwy_app_channel_title_fall_back(container, 0);
    gwy_file_channel_import_log_add(container, 0, NULL, filename);

fail:
    GWY_OBJECT_UNREF(mask);
    GWY_OBJECT_UNREF(dfield);
    g_free(buffer);
    g_free(header.units);

    return container;
}

#define free_regex(r) if (r) g_regex_unref(r); r = NULL
#define free_matchinfo(i) if (i) g_match_info_free(i); i = NULL

/* The header is split to lines, but the split is somewhat arbitrary.
 * Especially when a field is empty, the next field tends to continue on the
 * same line.  Do not try to parse it as a well-formatted header... */
static gchar*
ccsv_read_header(CorningCSVHeader *header,
                 gchar *p, GError **error)
{
    GMatchInfo *info = NULL;
    GRegex *regex;
    gchar *s, *retval = NULL;
    gdouble mmp, pmm;

    s = strstr(p, "Units: ");
    if (!s) {
        err_MISSING_FIELD(error, "Units");
        return NULL;
    }

    s += sizeof("Units: ")-1;
    while (*s != '\0' && *s != '\n' && *s != '\r')
        s++;
    if (*s == '\0') {
        err_TRUNCATED_HEADER(error);
        return NULL;
    }
    *s = '\0';

    regex = g_regex_new("\\bSize:\\s*(?P<xres>[0-9]+)x(?P<yres>[0-9]+)",
                        G_REGEX_NO_AUTO_CAPTURE, 0, NULL);
    g_return_val_if_fail(regex, NULL);
    if (!g_regex_match(regex, p, 0, &info)) {
        err_MISSING_FIELD(error, "Size");
        goto fail;
    }
    header->xres = atoi(g_match_info_fetch_named(info, "xres"));
    header->yres = atoi(g_match_info_fetch_named(info, "yres"));
    gwy_debug("xres %d, yres %d", header->xres, header->yres);
    free_matchinfo(info);
    free_regex(regex);

    regex = g_regex_new("(?P<pixmm>-?[0-9.]+)\\s+p/mm\\s+"
                        "(?P<mmpix>-?[0-9.]+)\\s+mm/p\\b",
                        G_REGEX_NO_AUTO_CAPTURE
                        | G_REGEX_MULTILINE
                        | G_REGEX_DOTALL, 0, NULL);
    g_return_val_if_fail(regex, NULL);
    if (!g_regex_match(regex, p, 0, &info)) {
        err_MISSING_FIELD(error, "Box");
        goto fail;
    }
    pmm = g_strtod(g_match_info_fetch_named(info, "pixmm"), NULL);
    mmp = g_strtod(g_match_info_fetch_named(info, "mmpix"), NULL);
    free_matchinfo(info);
    free_regex(regex);
    pmm = sqrt(fabs(pmm/mmp));
    header->xreal = 1e-3 * header->xres * pmm;
    header->yreal = 1e-3 * header->yres * pmm;
    gwy_debug("xreal %g, yreal %g", header->xreal, header->yreal);
    if (!(header->xreal > 0.0)) {
        g_warning("Real pixel width is 0.0, fixing to 1.0");
        header->xreal = header->yreal = 1.0;
    }

    regex = g_regex_new("\\bUnits:\\s+(?P<units>\\S+)",
                        G_REGEX_NO_AUTO_CAPTURE, 0, NULL);
    g_return_val_if_fail(regex, NULL);
    if (!g_regex_match(regex, p, 0, &info)) {
        err_MISSING_FIELD(error, "Units");
        goto fail;
    }
    header->units = g_strdup(g_match_info_fetch_named(info, "units"));
    gwy_debug("units %s", header->units);
    free_matchinfo(info);
    free_regex(regex);

    retval = s+1;

fail:
    free_matchinfo(info);
    free_regex(regex);

    return retval;
}

static gboolean
ccsv_read_images(const CorningCSVHeader *header,
                 gchar *p,
                 GwyDataField **dfield, GwyDataField **mask,
                 GError **error)
{
    gdouble *d, *m;
    gchar *line, *end;
    gint xres, yres, i, j;

    xres = header->xres;
    yres = header->yres;

    *dfield = gwy_data_field_new(xres, yres, header->xreal, header->yreal,
                                 TRUE);
    gwy_si_unit_set_from_string(gwy_data_field_get_si_unit_xy(*dfield), "m");
    *mask = gwy_data_field_new_alike(*dfield, TRUE);
    if (!gwy_strequal(header->units, "Microns")) {
        g_warning("Units are not Microns, setting to metre anyway.");
    }
    gwy_si_unit_set_from_string(gwy_data_field_get_si_unit_z(*dfield), "m");
    d = gwy_data_field_get_data(*dfield);
    m = gwy_data_field_get_data(*mask);

    while (*p == '\r' || *p == '\n')
        p++;

    for (i = 0; i < yres; i++) {
        line = gwy_str_next_line(&p);
        if (!line) {
            err_TRUNCATED_PART(error, "data");
            return FALSE;
        }
        for (j = 0; j < xres; j++) {
            if (strncmp(line, "NaN", 3) == 0) {
                m[i*xres + j] = 1.0;
                line += 3;
            }
            else {
                d[i*xres + j] = 1e-6*g_strtod(line, &end);
                if (end == line) {
                    err_TRUNCATED_PART(error, "data");
                    return FALSE;
                }
                line = end;
            }
            while (*line == ',' || g_ascii_isspace(*line))
                line++;
        }
    }

    gwy_data_field_laplace_solve(*dfield, *mask, -1, 1.0);

    return TRUE;
}

/* vim: set cin et ts=4 sw=4 cino=>1s,e0,n0,f0,{0,}0,^0,\:1s,=0,g1s,h0,t0,+1s,c3,(0,u0 : */
