Spaces:

bugroup
/

Eye_Tracking_Drift_Correction

Running

App Files Files Community

hugpv commited on Feb 8

Commit

8e5930e

•

1 Parent(s): 0e09020

initial commit via hf

Browse files

Files changed (27) hide show

.gitignore +2 -0
algo_cfgs_all.json +51 -0
analysis_funcs.py +355 -0
app.py +1453 -0
classic_correction_algos.py +546 -0
eyekit_measures.py +178 -0
loss_functions.py +179 -0
models.py +897 -0
models/BERT_20240104-223349_loop_normalize_by_line_height_and_width_True_dataset_folder_idx_evaluation_8_epoch=41-val_loss=0.00430.ckpt +3 -0
models/BERT_20240104-233803_loop_normalize_by_line_height_and_width_False_dataset_folder_idx_evaluation_8_epoch=41-val_loss=0.00719.ckpt +3 -0
models/BERT_20240107-152040_loop_restrict_sim_data_to_4000_dataset_folder_idx_evaluation_8_epoch=41-val_loss=0.00515.ckpt +3 -0
models/BERT_20240108-000344_loop_normalize_by_line_height_and_width_False_dataset_folder_idx_evaluation_8_epoch=41-val_loss=0.00706.ckpt +3 -0
models/BERT_20240108-011230_loop_normalize_by_line_height_and_width_True_dataset_folder_idx_evaluation_8_epoch=41-val_loss=0.00560.ckpt +3 -0
models/BERT_20240109-090419_loop_normalize_by_line_height_and_width_False_dataset_folder_idx_evaluation_8_epoch=41-val_loss=0.00518.ckpt +3 -0
models/BERT_20240122-183729_loop_normalize_by_line_height_and_width_True_dataset_folder_idx_evaluation_8_epoch=41-val_loss=0.00523.ckpt +3 -0
models/BERT_20240122-194041_loop_normalize_by_line_height_and_width_False_dataset_folder_idx_evaluation_8_epoch=41-val_loss=0.00462.ckpt +3 -0
models/BERT_fin_exp_20240104-223349.yaml +100 -0
models/BERT_fin_exp_20240104-233803.yaml +100 -0
models/BERT_fin_exp_20240107-152040.yaml +100 -0
models/BERT_fin_exp_20240108-000344.yaml +100 -0
models/BERT_fin_exp_20240108-011230.yaml +100 -0
models/BERT_fin_exp_20240109-090419.yaml +100 -0
models/BERT_fin_exp_20240122-183729.yaml +102 -0
models/BERT_fin_exp_20240122-194041.yaml +102 -0
requirements.txt +25 -0
run_in_notebook.ipynb +0 -0
utils.py +2016 -0

.gitignore ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ __pycache__/
2	+ .gitignore

algo_cfgs_all.json ADDED Viewed

	@@ -0,0 +1,51 @@

+{
+    "compare": {
+        "x_thresh": 512,
+        "n_nearest_lines": 3
+    },
+    "attach": {},
+    "segment": {},
+    "split": {},
+    "stretch": {
+        "stretch_bounds": [
+            0.9,
+            1.1
+        ],
+        "offset_bounds": [
+            -50,
+            50
+        ]
+    },
+    "slice": {
+            "x_thresh": 192,
+            "y_thresh": 32,
+            "w_thresh": 32,
+            "n_thresh": 90
+    },
+    "warp": {},
+    "chain": {
+        "x_thresh": 192,
+        "y_thresh": 55
+    },
+    "regress": {
+        "slope_bounds": [
+            -0.1,
+            0.1
+        ],
+        "offset_bounds": [
+            -50,
+            50
+        ],
+        "std_bounds": [
+            1,
+            20
+        ]
+    },
+    "cluster": {},
+    "merge": {
+        "y_thresh": 32,
+        "gradient_thresh": 0.1,
+        "error_thresh": 20
+    }
+}

analysis_funcs.py ADDED Viewed

	@@ -0,0 +1,355 @@

+"""
+Partially taken and adapted from: https://github.com/jwcarr/eyekit/blob/1db1913411327b108b87e097a00278b6e50d0751/eyekit/measure.py
+Functions for calculating common reading measures, such as gaze duration or
+initial landing position.
+"""
+import pandas as pd
+def fix_in_ia(fix_x, fix_y, ia_x_min, ia_x_max, ia_y_min, ia_y_max):
+    in_x = ia_x_min <= fix_x <= ia_x_max
+    in_y = ia_y_min <= fix_y <= ia_y_max
+    if in_x and in_y:
+        return True
+    else:
+        return False
+def fix_in_ia_default(fixation, ia_row, prefix):
+    return fix_in_ia(
+        fixation.x,
+        fixation.y,
+        ia_row[f"{prefix}_xmin"],
+        ia_row[f"{prefix}_xmax"],
+        ia_row[f"{prefix}_ymin"],
+        ia_row[f"{prefix}_ymax"],
+    )
+def number_of_fixations_own(trial, dffix, prefix="word"):
+    """
+    Given an interest area and fixation sequence, return the number of
+    fixations on that interest area.
+    """
+    ia_df = pd.DataFrame(trial[f"{prefix}s_list"])
+    counts = []
+    for cidx, ia_row in ia_df.iterrows():
+        count = 0
+        for idx, fixation in dffix.iterrows():
+            if fix_in_ia(
+                fixation.x,
+                fixation.y,
+                ia_row[f"{prefix}_xmin"],
+                ia_row[f"{prefix}_xmax"],
+                ia_row[f"{prefix}_ymin"],
+                ia_row[f"{prefix}_ymax"],
+            ):
+                count += 1
+        counts.append(
+            {
+                f"{prefix}_index": cidx,
+                prefix: ia_row[f"{prefix}"],
+                "number_of_fixations": count,
+            }
+        )
+    return pd.DataFrame(counts)
+def initial_fixation_duration_own(trial, dffix, prefix="word"):
+    """
+    Given an interest area and fixation sequence, return the duration of the
+    initial fixation on that interest area for each word.
+    """
+    ia_df = pd.DataFrame(trial[f"{prefix}s_list"])
+    durations = []
+    for cidx, ia_row in ia_df.iterrows():
+        initial_duration = 0
+        for idx, fixation in dffix.iterrows():
+            if fix_in_ia_default(fixation, ia_row, prefix):
+                initial_duration = fixation.duration
+                break  # Exit the loop after finding the initial fixation for the word
+        durations.append(
+            {
+                f"{prefix}_index": cidx,
+                prefix: ia_row[f"{prefix}"],
+                "initial_fixation_duration": initial_duration,
+            }
+        )
+    return pd.DataFrame(durations)
+def first_of_many_duration_own(trial, dffix, prefix="word"):
+    ia_df = pd.DataFrame(trial[f"{prefix}s_list"])
+    durations = []
+    for cidx, ia_row in ia_df.iterrows():
+        fixation_durations = []
+        for idx, fixation in dffix.iterrows():
+            if fix_in_ia_default(fixation, ia_row, prefix):
+                fixation_durations.append(fixation.duration)
+        if len(fixation_durations) > 1:
+            durations.append(
+                {
+                    f"{prefix}_index": cidx,
+                    prefix: ia_row[f"{prefix}"],
+                    "first_of_many_duration": fixation_durations[0],
+                }
+            )
+    if durations:
+        return pd.DataFrame(durations)
+    else:
+        return pd.DataFrame()
+def total_fixation_duration_own(trial, dffix, prefix="word"):
+    """
+    Given an interest area and fixation sequence, return the sum duration of
+    all fixations on that interest area.
+    """
+    ia_df = pd.DataFrame(trial[f"{prefix}s_list"])
+    durations = []
+    for cidx, ia_row in ia_df.iterrows():
+        total_duration = 0
+        for idx, fixation in dffix.iterrows():
+            if fix_in_ia_default(fixation, ia_row, prefix):
+                total_duration += fixation.duration
+        durations.append(
+            {
+                f"{prefix}_index": cidx,
+                prefix: ia_row[f"{prefix}"],
+                "total_fixation_duration": total_duration,
+            }
+        )
+    return pd.DataFrame(durations)
+def gaze_duration_own(trial, dffix, prefix="word"):
+    """
+    Given an interest area and fixation sequence, return the gaze duration on
+    that interest area. Gaze duration is the sum duration of all fixations
+    inside an interest area until the area is exited for the first time.
+    """
+    ia_df = pd.DataFrame(trial[f"{prefix}s_list"])
+    durations = []
+    for cidx, ia_row in ia_df.iterrows():
+        duration = 0
+        in_ia = False
+        for idx, fixation in dffix.iterrows():
+            if fix_in_ia_default(fixation, ia_row, prefix):
+                duration += fixation.duration
+                in_ia = True
+            elif in_ia:
+                break
+        durations.append(
+            {
+                f"{prefix}_index": cidx,
+                prefix: ia_row[f"{prefix}"],
+                "gaze_duration": duration,
+            }
+        )
+    return pd.DataFrame(durations)
+def go_past_duration_own(trial, dffix, prefix="word"):
+    """
+    Given an interest area and fixation sequence, return the go-past time on
+    that interest area. Go-past time is the sum duration of all fixations from
+    when the interest area is first entered until when it is first exited to
+    the right, including any regressions to the left that occur during that
+    time period (and vice versa in the case of right-to-left text).
+    """
+    ia_df = pd.DataFrame(trial[f"{prefix}s_list"])
+    results = []
+    for cidx, ia_row in ia_df.iterrows():
+        entered = False
+        go_past_time = 0
+        for idx, fixation in dffix.iterrows():
+            if fix_in_ia_default(fixation, ia_row, prefix):
+                if not entered:
+                    entered = True
+                go_past_time += fixation.duration
+            elif entered:
+                if ia_row[f"{prefix}_xmax"] < fixation.x:  # Interest area has been exited to the right
+                    break
+                go_past_time += fixation.duration
+        results.append({f"{prefix}_index": cidx, prefix: ia_row[f"{prefix}"], "go_past_duration": go_past_time})
+    return pd.DataFrame(results)
+def second_pass_duration_own(trial, dffix, prefix="word"):
+    """
+    Given an interest area and fixation sequence, return the second pass
+    duration on that interest area for each word.
+    """
+    ia_df = pd.DataFrame(trial[f"{prefix}s_list"])
+    durations = []
+    for cidx, ia_row in ia_df.iterrows():
+        current_pass = None
+        next_pass = 1
+        pass_duration = 0
+        for idx, fixation in dffix.iterrows():
+            if fix_in_ia_default(fixation, ia_row, prefix):
+                if current_pass is None:  # first fixation in a new pass
+                    current_pass = next_pass
+                if current_pass == 2:
+                    pass_duration += fixation.duration
+            elif current_pass == 1:  # first fixation to exit the first pass
+                current_pass = None
+                next_pass += 1
+            elif current_pass == 2:  # first fixation to exit the second pass
+                break
+        durations.append(
+            {
+                f"{prefix}_index": cidx,
+                prefix: ia_row[f"{prefix}"],
+                "second_pass_duration": pass_duration,
+            }
+        )
+    return pd.DataFrame(durations)
+def initial_landing_position_own(trial, dffix, prefix="word"):
+    """
+    Given an interest area and fixation sequence, return the initial landing
+    position (expressed in character positions) on that interest area.
+    Counting is from 1. If the interest area represents right-to-left text,
+    the first character is the rightmost one. Returns `None` if no fixation
+    landed on the interest area.
+    """
+    ia_df = pd.DataFrame(trial[f"{prefix}s_list"])
+    if prefix == "word":
+        chars_df = pd.DataFrame(trial[f"chars_list"])
+    else:
+        chars_df = None
+    results = []
+    for cidx, ia_row in ia_df.iterrows():
+        landing_position = None
+        for idx, fixation in dffix.iterrows():
+            if fix_in_ia_default(fixation, ia_row, prefix):
+                if prefix == "char":
+                    landing_position = 1
+                else:
+                    prefix_temp = "char"
+                    matched_chars_df = chars_df.loc[
+                        (chars_df.char_xmin >= ia_row[f"{prefix}_xmin"])
+                        & (chars_df.char_xmax <= ia_row[f"{prefix}_xmax"])
+                        & (chars_df.char_ymin >= ia_row[f"{prefix}_ymin"])
+                        & (chars_df.char_ymax <= ia_row[f"{prefix}_ymax"]),
+                        :,
+                    ]  # need to find way to count correct letter number
+                    for char_idx, (rowidx, char_row) in enumerate(matched_chars_df.iterrows()):
+                        if fix_in_ia_default(fixation, char_row, prefix_temp):
+                            landing_position = char_idx + 1  # starts at 1
+                            break
+                break
+        results.append(
+            {
+                f"{prefix}_index": cidx,
+                prefix: ia_row[f"{prefix}"],
+                "initial_landing_position": landing_position,
+            }
+        )
+    return pd.DataFrame(results)
+def initial_landing_distance_own(trial, dffix, prefix="word"):
+    """
+    Given an interest area and fixation sequence, return the initial landing
+    distance on that interest area. The initial landing distance is the pixel
+    distance between the first fixation to land in an interest area and the
+    left edge of that interest area (or, in the case of right-to-left text,
+    the right edge). Technically, the distance is measured from the text onset
+    without including any padding. Returns `None` if no fixation landed on the
+    interest area.
+    """
+    ia_df = pd.DataFrame(trial[f"{prefix}s_list"])
+    distances = []
+    for cidx, ia_row in ia_df.iterrows():
+        initial_distance = None
+        for idx, fixation in dffix.iterrows():
+            if fix_in_ia_default(fixation, ia_row, prefix):
+                distance = abs(ia_row[f"{prefix}_xmin"] - fixation.x)
+                if initial_distance is None:
+                    initial_distance = distance
+                    break
+        distances.append(
+            {
+                f"{prefix}_index": cidx,
+                prefix: ia_row[f"{prefix}"],
+                "initial_landing_distance": initial_distance,
+            }
+        )
+    return pd.DataFrame(distances)
+def landing_distances_own(trial, dffix, prefix="word"):
+    """
+    Given an interest area and fixation sequence, return a dataframe with
+    landing distances for each word in the interest area.
+    """
+    ia_df = pd.DataFrame(trial[f"{prefix}s_list"])
+    distances = []
+    for cidx, ia_row in ia_df.iterrows():
+        landing_distances = []
+        for idx, fixation in dffix.iterrows():
+            if fix_in_ia_default(fixation, ia_row, prefix):
+                landing_distance = abs(ia_row[f"{prefix}_xmin"] - fixation.x)
+                landing_distances.append(round(landing_distance, ndigits=2))
+        distances.append({f"{prefix}_index": cidx, prefix: ia_row[f"{prefix}"], "landing_distances": landing_distances})
+    return pd.DataFrame(distances)
+def number_of_regressions_in_own(trial, dffix, prefix="word"):
+    """
+    Given an interest area and fixation sequence, return the number of
+    regressions back to that interest area after the interest area was read
+    for the first time. In other words, find the first fixation to exit the
+    interest area and then count how many times the reader returns to the
+    interest area from the right (or from the left in the case of
+    right-to-left text).
+    """
+    ia_df = pd.DataFrame(trial[f"{prefix}s_list"])
+    counts = []
+    for cidx, ia_row in ia_df.iterrows():
+        entered_interest_area = False
+        first_exit_index = None
+        count = 0
+        prev_fixation = None
+        regression_counted = False
+        for fixidx, (rowidx, fixation) in enumerate(dffix.iterrows()):
+            if (
+                entered_interest_area
+                and first_exit_index is not None
+                and fix_in_ia_default(fixation, ia_row, prefix)
+                and not regression_counted
+            ):
+                if prev_fixation.x > fixation.x:
+                    count += 1
+                    regression_counted = True
+            if fix_in_ia_default(fixation, ia_row, prefix):
+                entered_interest_area = True
+            elif entered_interest_area and first_exit_index is None:
+                first_exit_index = fixidx
+            else:
+                regression_counted = False
+            prev_fixation = fixation
+        counts.append(
+            {
+                f"{prefix}_index": cidx,
+                prefix: ia_row[f"{prefix}"],
+                "number_of_regressions_in": count,
+            }
+        )
+    return pd.DataFrame(counts)

app.py ADDED Viewed

	@@ -0,0 +1,1453 @@

+import copy
+from PIL import Image
+from io import StringIO
+import streamlit as st
+import pandas as pd
+import numpy as np
+import re
+import time
+import os
+from matplotlib.font_manager import FontProperties
+from matplotlib.patches import Rectangle
+from matplotlib import pyplot as plt
+import plotly.graph_objects as go
+import plotly.express as px
+import numpy as np
+import pandas as pd
+import pathlib as pl
+import json
+import logging
+import zipfile
+from stqdm import stqdm
+import jellyfish as jf
+import lovely_tensors
+import shutil
+import eyekit_measures as ekm
+import zipfile
+import utils as ut
+os.environ["MPLCONFIGDIR"] = os.getcwd() + "/configs/"
+st.set_page_config("Correction", page_icon=":eye:", layout="wide")
+AVAILABLE_FONTS = st.session_state["AVAILABLE_FONTS"] = ut.AVAILABLE_FONTS
+DEFAULT_PLOT_FONT = "DejaVu Sans Mono"
+EXAMPLES_FOLDER = "./testfiles/"
+EXAMPLES_ASC_ZIP_FILENAME = "asc_files.zip"
+OSF_DOWNLAOD_LINK = "https://osf.io/download/us97f/"
+EXAMPLES_FOLDER_PATH = pl.Path(EXAMPLES_FOLDER)
+lovely_tensors.monkey_patch()
+def make_folders(gradio_temp_folder, gradio_temp_unzipped_folder, gradio_plots):
+    return ut.make_folders(gradio_temp_folder, gradio_temp_unzipped_folder, gradio_plots)
+TEMP_FOLDER = st.session_state["TEMP_FOLDER"] = ut.TEMP_FOLDER
+gradio_temp_unzipped_folder = st.session_state["gradio_temp_unzipped_folder"] = pl.Path("unzipped")
+PLOTS_FOLDER = st.session_state["PLOTS_FOLDER"] = pl.Path("plots")
+TEMP_FIGURE_STIMULUS_PATH = PLOTS_FOLDER.joinpath("temp_matplotlib_plot_stimulus.png")
+make_folders(TEMP_FOLDER, gradio_temp_unzipped_folder, PLOTS_FOLDER)
+@st.cache_data
+def get_classic_cfg(fname):
+    return ut.get_classic_cfg(fname)
+classic_algos_cfg = st.session_state["classic_algos_cfg"] = get_classic_cfg("algo_cfgs_all.json")
+DIST_MODELS_FOLDER = st.session_state["DIST_MODELS_FOLDER"] = pl.Path("models")
+COLORS = st.session_state["COLORS"] = px.colors.qualitative.Alphabet
+ALGO_CHOICES = st.session_state["ALGO_CHOICES"] = [
+    "warp",
+    "regress",
+    "compare",
+    "attach",
+    "segment",
+    "split",
+    "stretch",
+    "chain",
+    "slice",
+    "cluster",
+    "merge",
+    "Wisdom_of_Crowds",
+    "DIST",
+    "DIST-Ensemble",
+    "Wisdom_of_Crowds_with_DIST",
+    "Wisdom_of_Crowds_with_DIST_Ensemble",
+]
+st.session_state["colnames_custom_csv_fix"] = {
+    "x_col_name_fix": "x",
+    "y_col_name_fix": "y",
+    "x_col_name_fix_stim": "char_x_center",
+    "x_start_col_name_fix_stim": "char_xmin",
+    "x_end_col_name_fix_stim": "char_xmax",
+    "y_col_name_fix_stim": "char_y_center",
+    "y_start_col_name_fix_stim": "char_ymin",
+    "y_end_col_name_fix_stim": "char_ymax",
+    "char_col_name_fix_stim": "char",
+    "trial_id_col_name_fix": "trial_id",
+    "trial_id_col_name_stim": "trial_id",
+    "subject_col_name_fix": "subid",
+    "subject_col_name_stim": "subid",
+    "line_num_col_name_stim": "assigned_line",
+    "time_start_col_name_fix": "start",
+    "time_stop_col_name_fix": "stop",
+}
+if "results" not in st.session_state:
+    st.session_state["results"] = {}
+@st.cache_resource
+def load_model(model_file, cfg):
+    return ut.load_model(model_file, cfg)
+@st.cache_resource
+def find_and_load_model(model_date="20240104-223349"):
+    return ut.find_and_load_model(model_date)
+def create_logger(name, level="DEBUG", file=None):
+    logger = logging.getLogger(name)
+    logger.propagate = False
+    logger.setLevel(level)
+    if sum([isinstance(handler, logging.StreamHandler) for handler in logger.handlers]) == 0:
+        ch = logging.StreamHandler()
+        ch.setFormatter(
+            logging.Formatter(
+                "%(asctime)s.%(msecs)03d-%(name)s-p%(process)s-{%(pathname)s:%(lineno)d}-%(levelname)s >>> %(message)s",
+                "%m-%d %H:%M:%S",
+            )
+        )
+        logger.addHandler(ch)
+    if file is not None:
+        if sum([isinstance(handler, logging.FileHandler) for handler in logger.handlers]) == 0:
+            ch = logging.FileHandler(file, "w")
+            ch.setFormatter(
+                logging.Formatter(
+                    "%(asctime)s.%(msecs)03d-%(name)s-p%(process)s-{%(pathname)s:%(lineno)d}-%(levelname)s >>> %(message)s",
+                    "%m-%d %H:%M:%S",
+                )
+            )
+            logger.addHandler(ch)
+    logger.debug("Logger added")
+    return logger
+if "logger" not in st.session_state:
+    st.session_state["logger"] = create_logger(name="app", level="DEBUG", file="log_for_app.log")
+@st.cache_data
+def download_example_ascs(EXAMPLES_FOLDER, EXAMPLES_ASC_ZIP_FILENAME, OSF_DOWNLAOD_LINK, EXAMPLES_FOLDER_PATH):
+    return ut.download_example_ascs(EXAMPLES_FOLDER, EXAMPLES_ASC_ZIP_FILENAME, OSF_DOWNLAOD_LINK, EXAMPLES_FOLDER_PATH)
+EXAMPLE_ASC_FILES = download_example_ascs(
+    EXAMPLES_FOLDER, EXAMPLES_ASC_ZIP_FILENAME, OSF_DOWNLAOD_LINK, EXAMPLES_FOLDER_PATH
+)
+def asc_to_trial_ids(asc_file, close_gap_between_words=True):
+    return ut.asc_to_trial_ids(asc_file, close_gap_between_words)
+@st.cache_data
+def get_trials_list(asc_file=None, close_gap_between_words=True):
+    return ut.get_trials_list(asc_file, close_gap_between_words)
+@st.cache_data
+def prep_data_for_dist(model_cfg, dffix, trial=None):
+    return ut.prep_data_for_dist(model_cfg, dffix, trial)
+def save_trial_to_json(trial, savename):
+    return ut.save_trial_to_json(trial, savename)
+def export_csv(dffix, trial):
+    return ut.export_csv(dffix, trial)
+@st.cache_data
+def get_DIST_preds(dffix, trial):
+    return ut.get_DIST_preds(dffix, trial)
+@st.cache_data
+def get_EDIST_preds_with_model_check(dffix, trial, ensemble_model_avg=None):
+    return ut.get_EDIST_preds_with_model_check(dffix, trial, ensemble_model_avg)
+def get_all_classic_preds(dffix, trial):
+    return ut.get_all_classic_preds(dffix, trial)
+def apply_woc(dffix, trial, corrections, algo_choice):
+    return ut.apply_woc(dffix, trial, corrections, algo_choice)
+@st.cache_data
+def correct_df(
+    dffix,
+    algo_choice,
+    trial=None,
+    for_multi=False,
+    ensemble_model_avg=None,
+):
+    return ut.correct_df(
+        dffix,
+        algo_choice,
+        trial,
+        for_multi,
+        ensemble_model_avg,
+    )
+@st.cache_data
+def get_font_and_font_size_from_trial(trial):
+    return ut.get_font_and_font_size_from_trial(trial)
+@st.cache_data
+def add_default_font_and_character_props_to_state(trial):
+    return ut.add_default_font_and_character_props_to_state(trial)
+@st.cache_data
+def get_plot_props(trial, available_fonts):
+    return ut.get_plot_props(trial, available_fonts)
+def process_trial_choice(trial_id, algo_choice):
+    if isinstance(trial_id, dict):
+        trial_id = trial_id["value"]
+    trials_by_ids = st.session_state["trials_by_ids"]
+    trial = trials_by_ids[trial_id]
+    if "chars_list" in trial:
+        (
+            y_diff,
+            x_txt_start,
+            y_txt_start,
+            font_face,
+            _,
+            line_height,
+        ) = add_default_font_and_character_props_to_state(trial)
+        font_size = ut.set_font_from_chars_list(trial)
+        st.session_state["y_diff_for_eyekit"] = y_diff
+        st.session_state["x_txt_start_for_eyekit"] = x_txt_start
+        st.session_state["y_txt_start_for_eyekit"] = y_txt_start
+        st.session_state["font_face_for_eyekit"] = font_face
+        st.session_state["font_size_for_eyekit"] = font_size
+        st.session_state["line_height_for_eyekit"] = line_height
+    if "dffix" in trial:
+        dffix = trial["dffix"]
+    else:
+        asc_file = st.session_state["asc_file"]
+        trial["plot_file"] = str(PLOTS_FOLDER.joinpath(f"{asc_file.stem}_{trial_id}_2ndInput_chars_channel_sep.png"))
+        trial["fname"] = str(asc_file.name).split(".")[0]
+        df, dffix, trial = ut.trial_to_dfs(trial, st.session_state["lines"], use_synctime=True)
+        st.session_state["logger"].info(f"dffix.columns after trial_to_dfs {dffix.columns}")
+    font, font_size, dpi, screen_res = ut.get_plot_props(trial, AVAILABLE_FONTS)
+    st.session_state["trial"] = trial
+    if "chars_list" in trial:
+        chars_df = pd.DataFrame(trial["chars_list"])
+        trial["chars_df"] = chars_df.to_dict()
+        trial["y_char_unique"] = list(chars_df.char_y_center.sort_values().unique())
+    if algo_choice is not None and ("chars_list" in trial or "words_list" in trial):
+        dffix, _ = correct_df(dffix, algo_choice, trial)
+    else:
+        st.warning("🚨 Stimulus information needed for fixation correction 🚨")
+    return dffix, trial, dpi, screen_res, font, font_size
+@st.cache_data
+def process_trial_choice_single_csv(trial, algo_choice, file=None):
+    return ut.process_trial_choice_single_csv(trial, algo_choice, file=file)
+def quick_dffix_save(dffix, savename):
+    dffix.to_csv(savename)
+    st.session_state["logger"].info(f"Saved processed data as {savename}")
+def save_trial_to_json(trial, savename):
+    if "dffix" in trial:
+        trial.pop("dffix")
+    with open(savename, "w", encoding="utf-8") as f:
+        json.dump(trial, f, ensure_ascii=False, indent=4, cls=ut.NumpyEncoder)
+@st.cache_data
+def process_trial(trial, asc_file_stem, lines, algo_choice, for_multi=False):
+    trial_id = trial["trial_id"]
+    trial["plot_file"] = str(PLOTS_FOLDER.joinpath(f"{asc_file_stem}_{trial_id}_2ndInput_chars_channel_sep.png"))
+    trial["fname"] = str(asc_file_stem)
+    font, font_size, dpi, screen_res = ut.get_plot_props(trial, AVAILABLE_FONTS)
+    trial["font"] = font
+    trial["font_size"] = font_size
+    trial["dpi"] = dpi
+    trial["screen_res"] = screen_res
+    df, dffix, trial = ut.trial_to_dfs(trial, lines, use_synctime=True)
+    if dffix.empty:
+        return pd.DataFrame(), trial
+    chars_df = pd.DataFrame(trial["chars_list"])
+    trial["y_char_unique"] = list(chars_df.char_y_center.sort_values().unique())
+    trial["chars_df"] = chars_df.to_dict()
+    trial["y_char_unique"] = list(chars_df.char_y_center.sort_values().unique())
+    if algo_choice is not None:
+        dffix = correct_df(dffix, algo_choice, trial, for_multi)
+    return dffix, trial
+def add_text_to_ax(
+    chars_list,
+    ax,
+    font_to_use="DejaVu Sans Mono",
+    fontsize=21,
+    prefix="char",
+    plot_boxes=True,
+    plot_text=True,
+    box_annotations=None,
+):
+    return ut.add_text_to_ax(
+        chars_list,
+        ax,
+        font_to_use=font_to_use,
+        fontsize=fontsize,
+        prefix=prefix,
+        plot_boxes=plot_boxes,
+        plot_text=plot_text,
+        box_annotations=box_annotations,
+    )
+@st.cache_data
+def matplotlib_plot_df(
+    dffix,
+    trial,
+    algo_choice,
+    stimulus_prefix="word",
+    desired_dpi=300,
+    fix_to_plot=[],
+    stim_info_to_plot=["Words", "Word boxes"],
+    box_annotations=None,
+):
+    return ut.matplotlib_plot_df(
+        dffix,
+        trial,
+        algo_choice,
+        stimulus_prefix=stimulus_prefix,
+        desired_dpi=desired_dpi,
+        fix_to_plot=fix_to_plot,
+        stim_info_to_plot=stim_info_to_plot,
+        box_annotations=box_annotations,
+    )
+def sigmoid(x):
+    return 1 / (1 + np.exp(-1 * x))
+@st.cache_data
+def plotly_plot_with_image(
+    dffix,
+    trial,
+    algo_choice,
+    to_plot_list=["Uncorrected Fixations", "Words", "corrected fixations", "Word boxes"],
+    scale_factor=0.5,
+):
+    return ut.plotly_plot_with_image(
+        dffix,
+        trial,
+        algo_choice,
+        to_plot_list=to_plot_list,
+        scale_factor=scale_factor,
+    )
+@st.cache_data
+def plot_y_corr(dffix, algo_choice):
+    return ut.plot_y_corr(dffix, algo_choice)
+def plotly_df(
+    dffix=None, trial=None, algo_choice=None, to_plot_list=["fixations", "characters", "corrected fixations"], title=""
+):
+    if dffix is None:
+        dffix = st.session_state["dffix"]
+    if algo_choice is None:
+        algo_choice = st.session_state["algo_choice"]
+    st.session_state["logger"].info(f"Plotting {to_plot_list}")
+    num_datapoints = dffix.index
+    if trial is None:
+        if title in st.session_state["results"]:
+            chars_df = pd.DataFrame(st.session_state["results"][title]["trial"]["chars_list"])
+        else:
+            chars_df = pd.DataFrame(st.session_state["trial"]["chars_df"])
+    else:
+        chars_df = pd.DataFrame(trial["chars_list"]) if "chars_list" in trial else None
+    if chars_df is not None:
+        font_face, font_size = get_font_and_font_size_from_trial(trial)
+        font_size = font_size * 0.65  # guess for scaling
+        xmin = chars_df.char_x_center.min()
+        xmax = chars_df.char_x_center.max()
+        ymin = chars_df.char_y_center.min()
+        ymax = chars_df.char_y_center.max()
+    else:
+        st.warning("No character or word information available to plot")
+        xmin = dffix.x.min()
+        xmax = dffix.x.max()
+        ymin = dffix.y.min()
+        ymax = dffix.y.max()
+    layout = dict(
+        plot_bgcolor="white",
+        autosize=True,
+        margin=dict(t=1, l=10, r=10, b=1),
+        xaxis=dict(
+            title="x-coordinate",
+            linecolor="black",
+            range=[xmin - 100, xmax + 100],
+            showgrid=False,
+            mirror="all",
+            showline=True,
+        ),
+        yaxis=dict(
+            title="y-coordinate",
+            range=[ymax + 100, ymin - 100],
+            linecolor="black",
+            showgrid=False,
+            mirror="all",
+            showline=True,
+        ),
+        legend=dict(orientation="h", yanchor="bottom", y=1.05, xanchor="right", x=0.8),
+    )
+    fig = go.Figure(layout=layout)
+    if "Uncorrected Fixations" in to_plot_list:
+        duration_scaled = dffix.duration - dffix.duration.min()
+        duration = ((duration_scaled + 0.1) / duration_scaled.median()) * 5
+        fig.add_trace(
+            go.Scatter(
+                x=dffix.x,
+                y=dffix.y,
+                mode="markers+lines+text",
+                name="Raw fixations",
+                marker=dict(
+                    symbol="arrow",
+                    size=duration.values,
+                    angleref="previous",
+                ),
+                line_width=1.2,
+                text=num_datapoints,
+                textposition="middle right",
+                textfont=dict(
+                    family="sans serif",
+                    size=9,
+                ),
+                hoverinfo="text+x+y",
+                opacity=0.6,
+            )
+        )
+    if "Corrected Fixations" in to_plot_list:
+        if isinstance(algo_choice, list):
+            algo_choices = algo_choice
+            repeats = range(len(algo_choice))
+        else:
+            algo_choices = [algo_choice]
+            repeats = range(1)
+        for algoIdx in repeats:
+            algo_choice = algo_choices[algoIdx]
+            if f"y_{algo_choice}" in dffix.columns:
+                fig.add_trace(
+                    go.Scatter(
+                        x=dffix.x,
+                        y=dffix.loc[:, f"y_{algo_choice}"],
+                        mode="markers",
+                        name=f"{algo_choice} corrected",
+                        marker_color=st.session_state["COLORS"][algoIdx],
+                        marker_size=5,
+                        hoverinfo="text+x+y",
+                        opacity=0.75,
+                    )
+                )
+    if "Characters" in to_plot_list and chars_df is not None:
+        fig.add_trace(
+            go.Scatter(
+                x=chars_df.char_x_center,
+                y=chars_df.char_y_center,
+                mode="markers+text",
+                name="",
+                showlegend=False,
+                text=chars_df.char,
+                textposition="middle center",
+                marker=dict(color="black", size=0.1),
+                textfont=dict(family=font_face, size=font_size, color="Black"),
+            )
+        )
+    if "Character boxes (slow to plot)" in to_plot_list and chars_df is not None:
+        num = 0
+        for k, row in stqdm(chars_df.iterrows(), "Adding boxes"):
+            fig.add_shape(
+                type="rect",
+                x0=row.char_xmin,
+                y0=row.char_ymin,
+                x1=row.char_xmax,
+                y1=row.char_ymax,
+                line=dict(color=st.session_state["COLORS"][-1], width=1),
+            )
+            num += 1
+    return fig
+def save_to_zips(folder, pattern, savename):
+    if os.path.exists(TEMP_FOLDER.joinpath(savename)):
+        mode = "a"
+    else:
+        mode = "w"
+    for idx, f in enumerate(folder.glob(pattern)):
+        with zipfile.ZipFile(TEMP_FOLDER.joinpath(savename), mode=mode) as archive:
+            archive.write(f)
+        st.session_state["logger"].info(f"Written {f} to zip {TEMP_FOLDER.joinpath(savename)}")
+        if idx == 1:
+            mode = "a"
+    st.session_state["logger"].info("Done zipping")
+def process_multiple_asc(asc_files):
+    algo_choice = st.session_state["algo_choice_multi"]
+    if algo_choice is not None and "DIST" in algo_choice:
+        model, model_cfg = find_and_load_model(model_date=st.session_state["DIST_MODEL_DATE_WITH_NORM"])
+        model = st.session_state["single_DIST_model"]
+        model_cfg = st.session_state["single_DIST_model_cfg"]
+        st.session_state["logger"].info(f"process_multiple_asc loaded model")
+    else:
+        model, model_cfg = None, None
+    zipfiles_with_results = []
+    st.session_state["logger"].info(f"found asc_files {asc_files}")
+    for asc_file in stqdm(asc_files, desc="Processing asc files"):
+        st.session_state["logger"].info(f"processing asc_file {asc_file}")
+        asc_file_stem = pl.Path(asc_file.name).stem
+        trials_by_ids, lines = asc_to_trial_ids(asc_file)
+        for trial_id, trial in stqdm(trials_by_ids.items(), desc=f"\nProcessing trials in {asc_file_stem}"):
+            dffix, trial = process_trial(
+                trial,
+                asc_file_stem,
+                lines,
+                algo_choice,
+                True,
+            )
+            st.session_state["logger"].debug(f"dffix.columns after process trial {dffix.columns}")
+            if dffix.empty:
+                st.session_state["logger"].warning(f"Dataframe for {trial_id} is empty, skipping")
+                continue
+            st.session_state["results"][f"{asc_file_stem}_{trial_id}"] = {
+                "trial": trial,
+                "dffix": dffix,
+            }
+            st.session_state["logger"].debug(f"Added {asc_file_stem}_{trial_id} to st.session_state")
+            quick_dffix_save(dffix, TEMP_FOLDER.joinpath(f"{asc_file_stem}_{trial_id}.csv"))
+            save_trial_to_json(trial, TEMP_FOLDER.joinpath(f"{asc_file_stem}_{trial_id}.json"))
+            ut.plot_fixations_and_text(
+                dffix,
+                trial,
+                save=True,
+                savelocation=TEMP_FOLDER.joinpath(f"{asc_file_stem}_{trial_id}.png"),
+                algo_choice=algo_choice,
+                turn_axis_on=False,
+            )
+        if os.path.exists(TEMP_FOLDER.joinpath(f"{asc_file_stem}.zip")):
+            os.remove(TEMP_FOLDER.joinpath(f"{asc_file_stem}.zip"))
+        save_to_zips(TEMP_FOLDER, f"{asc_file_stem}*.csv", f"{asc_file_stem}.zip")
+        save_to_zips(TEMP_FOLDER, f"{asc_file_stem}*.json", f"{asc_file_stem}.zip")
+        save_to_zips(TEMP_FOLDER, f"{asc_file_stem}*.png", f"{asc_file_stem}.zip")
+        zipfiles_with_results += [str(x) for x in TEMP_FOLDER.glob(f"{asc_file_stem}*.zip")]
+    results_keys = list(st.session_state["results"].keys())
+    st.session_state["logger"].debug(f"results_keys are {results_keys}")
+    st.session_state["trial_choices_multi"] = results_keys
+    st.session_state["zipfiles_with_results"] = zipfiles_with_results
+    return (zipfiles_with_results, results_keys)
+@st.cache_data
+def get_trials_and_lines_from_asc_files(asc_files):
+    list_of_trial_lists = []
+    list_of_lines = []
+    total_num_trials = 0
+    asc_files_to_do = []
+    for filename_full in asc_files:
+        if hasattr(filename_full, "name") and not isinstance(filename_full, pl.Path):
+            file = filename_full.name
+            st.session_state["logger"].info(f"Filename is {file}, filename_full is {filename_full}")
+        else:
+            file = filename_full
+        if not isinstance(file, str):
+            file_stem = pl.Path(file.name).stem
+        else:
+            file_stem = pl.Path(file).stem
+        savefolder = gradio_temp_unzipped_folder.joinpath(file_stem)
+        st.session_state["logger"].info(f"Operating on file {file}")
+        if ".zip" in file:
+            with zipfile.ZipFile(filename_full, "r") as z:
+                z.extractall(str(savefolder))
+        elif ".tar" in file:
+            shutil.unpack_archive(file, savefolder, "tar")
+        elif ".asc" in file:
+            asc_files_to_do.append(filename_full)
+        else:
+            st.session_state["logger"].warning(f"Unsopported file format found in files")
+        newfiles = [str(x) for x in savefolder.glob(f"*.asc")]
+        asc_files_to_do += newfiles
+    st.session_state["logger"].info(f"asc_files_to_do is {asc_files_to_do}")
+    for asc_file in asc_files_to_do:
+        trials_by_ids, lines = asc_to_trial_ids(asc_file)
+        total_num_trials += len(trials_by_ids)
+        list_of_trial_lists.append(trials_by_ids)
+        list_of_lines.append(lines)
+    st.session_state["list_of_trial_lists"] = list_of_trial_lists
+    st.session_state["list_of_lines"] = list_of_lines
+    process_multiple_asc(st.session_state["multi_asc_filelist"])
+def process_trial_choice_and_update_df_multi():
+    trial_id = st.session_state["trial_id_multi"]
+    dffix = st.session_state["results"][trial_id]["dffix"]
+    if "start_time" in dffix.columns:
+        dffix = dffix.drop(axis=1, labels=["start_time", "end_time"])
+    st.session_state["dffix_multi"] = dffix
+    st.session_state["trial_multi"] = st.session_state["results"][trial_id]["trial"]
+@st.cache_data
+def convert_df(df):
+    return df.to_csv(index=False).encode("utf-8")
+def make_trial_from_stimulus_df(
+    stim_plot_df,
+    filename,
+    trial_id,
+):
+    chars_list = []
+    words_list = []
+    word_start_idx = 0
+    for idx, row in stim_plot_df.reset_index().iterrows():
+        char_dict = dict(
+            char_xmin=row[st.session_state["x_start_col_name_fix_stim"]],
+            char_xmax=row[st.session_state["x_end_col_name_fix_stim"]],
+            char_ymin=row[st.session_state["y_start_col_name_fix_stim"]],
+            char_ymax=row[st.session_state["y_end_col_name_fix_stim"]],
+            char_x_center=row[st.session_state["x_col_name_fix_stim"]],
+            char_y_center=row[st.session_state["y_col_name_fix_stim"]],
+            char=row[st.session_state["char_col_name_fix_stim"]],
+            assigned_line=int(row[st.session_state["line_num_col_name_stim"]]),
+        )
+        chars_list.append(char_dict)
+        if len(chars_list) > 1 and (
+            char_dict["char"] == " "
+            or (len(chars_list) > 2 and (chars_list[-1]["char_xmin"] < chars_list[-2]["char_xmin"]))
+        ):
+            word_dict = dict(
+                word_xmin=chars_list[word_start_idx]["char_xmin"],
+                word_xmax=chars_list[-2]["char_xmax"],
+                word_ymin=chars_list[word_start_idx]["char_ymin"],
+                word_ymax=chars_list[word_start_idx]["char_ymax"],
+                word_x_center=(chars_list[-2]["char_xmax"] - chars_list[word_start_idx]["char_xmin"]) / 2
+                + chars_list[word_start_idx]["char_xmin"],
+                word_y_center=(chars_list[word_start_idx]["char_ymax"] - chars_list[word_start_idx]["char_ymin"]) / 2
+                + chars_list[word_start_idx]["char_ymin"],
+                word="".join([chars_list[idx]["char"] for idx in range(word_start_idx, len(chars_list) - 1)]),
+            )
+            if char_dict["char"] != " ":
+                word_start_idx = idx
+            else:
+                word_start_idx = idx + 1
+            words_list.append(word_dict)
+    line_heights = [x["char_ymax"] - x["char_ymin"] for x in chars_list]
+    line_xcoords_all = [x["char_x_center"] for x in chars_list]
+    line_xcoords_no_pad = np.unique(line_xcoords_all)
+    line_ycoords_all = [x["char_y_center"] for x in chars_list]
+    line_ycoords_no_pad = np.unique(line_ycoords_all)
+    trial = dict(
+        filename=filename,
+        y_midline=[float(x) for x in list(stim_plot_df[st.session_state["y_col_name_fix_stim"]].unique())],
+        num_char_lines=len(stim_plot_df[st.session_state["y_col_name_fix_stim"]].unique()),
+        y_diff=[
+            float(x) for x in list(np.unique(np.diff(stim_plot_df[st.session_state["y_start_col_name_fix_stim"]])))
+        ],
+        trial_id=trial_id,
+        chars_list=chars_list,
+        words_list=words_list,
+        trial_is="paragraph",
+        text="".join([x["char"] for x in chars_list]),
+    )
+    trial["x_char_unique"] = [float(x) for x in list(line_xcoords_no_pad)]
+    trial["y_char_unique"] = list(map(float, list(line_ycoords_no_pad)))
+    x_diff, y_diff = ut.calc_xdiff_ydiff(
+        line_xcoords_no_pad, line_ycoords_no_pad, line_heights, allow_multiple_values=False
+    )
+    trial["x_diff"] = float(x_diff)
+    trial["y_diff"] = float(y_diff)
+    trial["num_char_lines"] = len(line_ycoords_no_pad)
+    trial["line_heights"] = list(map(float, line_heights))
+    trial["chars_list"] = chars_list
+    return trial
+@st.cache_data
+def get_fixations_file_trials_list(fixations_df, stimulus):
+    if isinstance(stimulus, pd.DataFrame):
+        stimulus[st.session_state["line_num_col_name_stim"]] -= stimulus[
+            st.session_state["line_num_col_name_stim"]
+        ].min()
+        stimulus.rename(
+            {
+                st.session_state["x_col_name_fix_stim"]: "char_x_center",
+                st.session_state["x_start_col_name_fix_stim"]: "char_xmin",
+                st.session_state["x_end_col_name_fix_stim"]: "char_xmax",
+                st.session_state["y_col_name_fix_stim"]: "char_y_center",
+                st.session_state["y_start_col_name_fix_stim"]: "char_ymin",
+                st.session_state["y_end_col_name_fix_stim"]: "char_ymax",
+                st.session_state["char_col_name_fix_stim"]: "char",
+                st.session_state["trial_id_col_name_stim"]: "trial_id",
+            },
+            axis=1,
+            inplace=True,
+        )
+    fixations_df.rename(
+        mapper={
+            st.session_state["x_col_name_fix"]: "x",
+            st.session_state["y_col_name_fix"]: "y",
+            st.session_state["time_start_col_name_fix"]: "corrected_start_time",
+            st.session_state["time_stop_col_name_fix"]: "corrected_end_time",
+            st.session_state["trial_id_col_name_fix"]: "trial_id",
+        },
+        axis=1,
+        inplace=True,
+    )
+    fixations_df["duration"] = fixations_df.corrected_end_time - fixations_df.corrected_start_time
+    if "trial_id" in stimulus:
+        fixations_df["trial_id"] = stimulus["trial_id"]
+    if "trial_id" in fixations_df:
+        if st.session_state["has_multiple_subject"]:
+            fixations_df["trial_id"] = [
+                f"{id}_{num}"
+                for id, num in zip(
+                    fixations_df[st.session_state["subject_col_name_fix"]],
+                    fixations_df[st.session_state["trial_id_col_name_fix"]],
+                )
+            ]
+        trial_keys = list(fixations_df[st.session_state["trial_id_col_name_fix"]].unique())
+        st.session_state["logger"].info(f"Found keys {trial_keys} for {st.session_state['single_csv_file'].name}")
+    else:
+        st.session_state["logger"].warning(f"trial id column not found assigning trial id trial_0.")
+        st.warning(f"trial id column not found assigning trial id trial_0.")
+        fixations_df["trial_id"] = "trial_0"
+    st.session_state["fixations_df"] = fixations_df
+    trials_by_ids = {}
+    for trial_id, subdf in fixations_df.groupby("trial_id"):
+        if isinstance(stimulus, pd.DataFrame):
+            stim_df = stimulus[stimulus.trial_id == trial_id]
+            stim_df = stim_df.dropna(axis=0, how="any")
+            subdf = subdf.dropna(axis=0, how="any")
+            subdf = subdf.reset_index(drop=True)
+            stim_df = stim_df.reset_index(drop=True)
+            assert not stim_df.empty, "stimulus df is empty"
+            trial = make_trial_from_stimulus_df(
+                stim_df,
+                st.session_state["single_csv_file_stim"].name,
+                trial_id,
+            )
+        else:
+            trial = stimulus
+        trial["dffix"] = subdf
+        trial["fname"] = f"{trial_id}"
+        trial["plot_file"] = str(
+            st.session_state["PLOTS_FOLDER"].joinpath(f"{trial_id}_2ndInput_chars_channel_sep.png")
+        )
+        trials_by_ids[trial_id] = trial
+    return trials_by_ids, trial_keys
+def try_reading_csv(file):
+    stringio = StringIO(file.getvalue().decode("utf-8"))
+    colname_mapping = {}
+    try:
+        df = pd.read_csv(stringio)
+        st.session_state["logger"].info(f"\n{df.head()}")
+        col_list = df.columns
+        assert len(col_list) > 1
+        return df
+    except Exception as e:
+        st.session_state["logger"].warn(e)
+        try:
+            df = pd.read_csv(StringIO(file.getvalue().decode("utf-8")), delimiter="\t")
+            col_list = df.columns
+            assert len(col_list) > 1
+            return df
+        except Exception as e:
+            st.session_state["logger"].warn(e)
+            return None
+@st.cache_data
+def guess_col_names_fix(file=None):
+    if file is None:
+        file = st.session_state["single_csv_file"]
+    if file is None:
+        return None
+    first_line = next(iter(StringIO(file.getvalue().decode("utf-8"))))
+    res = re.findall(r"[^()0-9-]+", first_line)
+    for delim in [",", "\t", ";"]:
+        first_line = first_line.split(delim)
+        if len(first_line) > 2:
+            break
+        else:
+            first_line = first_line[0]
+    scores_lists = {}
+    for k, v in st.session_state["colnames_custom_csv_fix"].items():
+        scores_lists[v] = []
+        for word in first_line:
+            scores_lists[v].append(jf.levenshtein_distance(v, word))
+    scores_df = pd.DataFrame(scores_lists)
+    scores_df.idxmin(axis=0)
+    df = try_reading_csv(file)
+    if df.shape[1] > 1:
+        return df
+    else:
+        return None
+@st.cache_data
+def guess_col_names_stim(file=None):
+    if file is None:
+        file = st.session_state["single_csv_file_stim"]
+    if file is None:
+        return None
+    if ".json" in file.name:
+        json_string = file.getvalue().decode("utf-8")
+        trial = json.loads(json_string)
+        return trial
+    else:
+        df = try_reading_csv(file)
+        if df.shape[1] > 1:
+            return df
+        else:
+            return None
+@st.cache_resource
+def set_up_models(dist_models_folder):
+    return ut.set_up_models(dist_models_folder)
+@st.cache_data
+def get_eyekit_measures(_txt, _seq, get_char_measures=False):
+    return ekm.get_eyekit_measures(_txt, _seq, get_char_measures=get_char_measures)
+@st.cache_data
+def get_all_measures(trial, dffix, prefix, use_corrected_fixations=True, correction_algo="warp"):
+    return ut.get_all_measures(trial, dffix, prefix, use_corrected_fixations=use_corrected_fixations, correction_algo=correction_algo)
+assert "ALGO_CHOICES" in st.session_state, f"st.session_state not initialized\n{list(st.session_state.keys())}"
+set_up_models_out = set_up_models(DIST_MODELS_FOLDER)
+st.session_state.update(set_up_models_out)
+st.title("Fixation data vertical alignment")
+st.header("👀 Read asc file or files and plot fixations 👀")
+st.markdown("[Contact Us](mailto:[email protected])")
+st.markdown("[Read about DIST model](https://arxiv.org/abs/2311.06095)")
+single_file_tab, multi_file_tab = st.tabs(["Single File 📁", "Multiple Files 📁 📁"])
+single_file_tab_asc_tab, single_file_tab_csv_tab = single_file_tab.tabs([".asc files", "custom files"])
+single_file_tab_asc_tab.subheader(
+    "Upload an .asc file and select a trial. Then select a correction algorithm and plot/download the results"
+)
+def change_which_file_is_used_and_clear_results():
+    if "dffix" in st.session_state:
+        del st.session_state["dffix"]
+    if "trial" in st.session_state:
+        del st.session_state["trial"]
+    if st.session_state["single_file_tab_asc_tab_example_use_example_or_uploaded_file_choice"] == "Example File":
+        st.session_state["single_asc_file_asc"] = st.session_state["single_file_tab_asc_tab_example_file_choice"]
+    else:
+        st.session_state["single_asc_file_asc"] = st.session_state["single_asc_uploaded_file"]
+with single_file_tab_asc_tab.form("single_file_tab_asc_tab_load_example_form"):
+    single_asc_file_asc_uploaded = st.file_uploader(
+        "Select .asc File", accept_multiple_files=False, key="single_asc_uploaded_file", type=["asc"]
+    )
+    close_gap_between_words_single_asc = st.checkbox(
+        label="Should spaces between words be included in word bounding box?",
+        value=False,
+        key="close_gap_between_words_single_asc",
+    )
+    if os.path.isfile(EXAMPLE_ASC_FILES[0]):
+        example_file_choice = st.selectbox(
+            "Select example file", options=EXAMPLE_ASC_FILES, key="single_file_tab_asc_tab_example_file_choice"
+        )
+        use_example_or_uploaded_file_choice = st.radio(
+            "Should the uploaded file be used or the selected example file?",
+            index=1,
+            options=["Uploaded File", "Example File"],
+            key="single_file_tab_asc_tab_example_use_example_or_uploaded_file_choice",
+        )
+    upload_file_button = st.form_submit_button(
+        label="Load selected data.", on_click=change_which_file_is_used_and_clear_results
+    )
+if "single_asc_file_asc" in st.session_state and st.session_state["single_asc_file_asc"] is not None:
+    trial_choices_single_asc, trials_by_ids, lines, asc_file = get_trials_list(
+        st.session_state["single_asc_file_asc"], close_gap_between_words=close_gap_between_words_single_asc
+    )
+    st.session_state["trials_by_ids"] = trials_by_ids
+    st.session_state["trial_choices"] = trial_choices_single_asc
+    st.session_state["lines"] = lines
+    st.session_state["asc_file"] = asc_file
+    if trial_choices_single_asc:
+        with single_file_tab_asc_tab.form(key="single_file_tab_asc_tab_trial_select_form"):
+            col_a1, col_a2 = st.columns((1, 2))
+            with col_a1:
+                trial_choice = st.selectbox(
+                    "Which trial should be corrected?",
+                    trial_choices_single_asc,
+                    key="trial_id",
+                    index=0,
+                )
+            with col_a2:
+                st.multiselect(
+                    "Choose correction algorithm",
+                    ALGO_CHOICES,
+                    key="algo_choice",
+                    default=[ALGO_CHOICES[0]],
+                )
+            process_trial_btn = st.form_submit_button("Load and correct trial")
+        if process_trial_btn:
+            single_file_tab_asc_tab.write(f'You selected: {st.session_state["trial_id"]}')
+            dffix, trial, dpi, screen_res, font, font_size = process_trial_choice(
+                trial_choice, st.session_state["algo_choice"]
+            )
+            st.session_state["dffix"] = dffix
+            st.session_state["trial"] = trial
+            st.session_state["dpi"] = dpi
+            st.session_state["screen_res"] = screen_res
+            st.session_state["font"] = font
+            st.session_state["font_size"] = font_size
+            export_csv(dffix, trial)
+        if "dffix" in st.session_state and "trial" in st.session_state:
+            df_expander_single = single_file_tab_asc_tab.expander("Show Dataframe", False)
+            plot_expander_single = single_file_tab_asc_tab.expander("Show Plots", True)
+            df_expander_single.dataframe(st.session_state["dffix"])
+            csv = convert_df(st.session_state["dffix"])
+            df_expander_single.download_button(
+                "Download fixation dataframe",
+                csv,
+                f'{st.session_state["trial_id"]}.csv',
+                "text/csv",
+                key="download-csv-single",
+            )
+            plotting_checkboxes_single = plot_expander_single.multiselect(
+                "Select what gets plotted",
+                ["Uncorrected Fixations", "Corrected Fixations", "Words", "Word boxes"],
+                key="plotting_checkboxes_single",
+                default=["Uncorrected Fixations", "Corrected Fixations", "Words", "Word boxes"],
+            )
+            scale_factor_single_asc = plot_expander_single.number_input(
+                label="Scale factor for stimulus image", min_value=0.01, max_value=3.0, value=0.5, step=0.1
+            )
+            plot_expander_single.plotly_chart(
+                plotly_plot_with_image(
+                    st.session_state["dffix"],
+                    st.session_state["trial"],
+                    to_plot_list=plotting_checkboxes_single,
+                    algo_choice=st.session_state["algo_choice"],
+                    scale_factor=scale_factor_single_asc,
+                ),
+                use_container_width=False,
+            )
+            plot_expander_single.plotly_chart(
+                plot_y_corr(st.session_state["dffix"], st.session_state["algo_choice"]), use_container_width=True
+            )
+            if "chars_list" in st.session_state["trial"]:
+                analysis_expander_single_asc = single_file_tab_asc_tab.expander("Show Analysis results", True)
+                use_corrected_fixations_tickbox = analysis_expander_single_asc.checkbox(
+                    "Use corrected",
+                    True,
+                    "use_corrected_fixations_tickbox",
+                    help="Whether to use the corrected or uncorrected fixations for the analysis.",
+                )
+                eyekit_tab, own_analysis_tab = analysis_expander_single_asc.tabs(
+                    ["Analysis using eyekit", "Analysis without eyekit"]
+                )
+                with eyekit_tab:
+                    st.markdown("Analysis powered by [eyekit](https://jwcarr.github.io/eyekit/)")
+                    st.markdown(
+                        "Please adjust parameters below to align fixations with stimulus using the sliders.Eyekit analysis is based on this alignment."
+                    )
+                    a_c1, a_c2, a_c3, a_c4, a_c5, a_c6 = st.columns(6)
+                    if "Consolas" in AVAILABLE_FONTS:
+                        font_index = AVAILABLE_FONTS.index("Consolas")
+                    elif "Courier New" in AVAILABLE_FONTS:
+                        font_index = AVAILABLE_FONTS.index("Courier New")
+                    elif "DejaVu Sans Mono" in AVAILABLE_FONTS:
+                        font_index = AVAILABLE_FONTS.index("DejaVu Sans Mono")
+                    else:
+                        font_index = 0
+                    font_face = a_c1.selectbox(
+                        label="Select Font",
+                        options=AVAILABLE_FONTS,
+                        index=font_index,
+                        key="font_face_for_eyekit_single_asc",
+                    )
+                    algo_choice_single_asc_eyekit = a_c1.selectbox(
+                        "Algorithm", st.session_state["algo_choice"], index=0, key="algo_choice_single_asc_eyekit"
+                    )
+                    sliders_on_tickbox = a_c6.checkbox(
+                        "Sliders", True, "single_asc_eyekit_sliders_checkbox", help="Turns sliders on and off"
+                    )
+                    if "font_size_for_eyekit" not in st.session_state:
+                        (
+                            y_diff,
+                            x_txt_start,
+                            y_txt_start,
+                            _,
+                            _,
+                            line_height,
+                        ) = add_default_font_and_character_props_to_state(st.session_state["trial"])
+                        font_size = ut.set_font_from_chars_list(st.session_state["trial"])
+                        st.session_state["y_diff_for_eyekit"] = y_diff
+                        st.session_state["x_txt_start_for_eyekit"] = x_txt_start
+                        st.session_state["y_txt_start_for_eyekit"] = y_txt_start
+                        st.session_state["font_face_for_eyekit"] = font_face
+                        st.session_state["font_size_for_eyekit"] = font_size
+                        st.session_state["line_height_for_eyekit"] = line_height
+                    if sliders_on_tickbox:
+                        font_size = a_c2.select_slider(
+                            "Font Size",
+                            np.arange(5, 36, 0.25),
+                            st.session_state["font_size_for_eyekit"],
+                            key="font_size_for_eyekit_single_asc",
+                        )
+                        x_txt_start = a_c3.select_slider(
+                            "x",
+                            np.arange(300, 601, 1),
+                            round(st.session_state["x_txt_start_for_eyekit"]),
+                            key="x_txt_start_for_eyekit_single_asc",
+                            help="x coordinate of first character",
+                        )
+                        y_txt_start = a_c4.select_slider(
+                            "y",
+                            np.arange(100, 501, 1),
+                            round(st.session_state["y_txt_start_for_eyekit"]),
+                            key="y_txt_start_for_eyekit_single_asc",
+                            help="y coordinate of first character",
+                        )
+                        line_height = a_c5.select_slider(
+                            "Line height",
+                            np.arange(0, 151, 1),
+                            round(st.session_state["line_height_for_eyekit"]),
+                            key="line_height_for_eyekit_single_asc",
+                        )
+                    else:
+                        font_size = a_c2.number_input(
+                            "Font Size",
+                            None,
+                            None,
+                            round(st.session_state["font_size_for_eyekit"], ndigits=0),
+                            key="font_size_for_eyekit_single_asc",
+                        )
+                        x_txt_start = a_c3.number_input(
+                            "x",
+                            None,
+                            None,
+                            round(st.session_state["x_txt_start_for_eyekit"]),
+                            key="x_txt_start_for_eyekit_single_asc",
+                            help="x coordinate of first character",
+                        )
+                        y_txt_start = a_c4.number_input(
+                            "y",
+                            None,
+                            None,
+                            round(st.session_state["y_txt_start_for_eyekit"]),
+                            key="y_txt_start_for_eyekit_single_asc",
+                            help="y coordinate of first character",
+                        )
+                        line_height = a_c5.number_input(
+                            "Line height",
+                            None,
+                            None,
+                            round(st.session_state["line_height_for_eyekit"]),
+                            key="line_height_for_eyekit_single_asc",
+                        )
+                    fixation_sequence, textblock, screen_size = ekm.get_fix_seq_and_text_block(
+                        st.session_state["dffix"],
+                        st.session_state["trial"],
+                        x_txt_start=st.session_state["x_txt_start_for_eyekit_single_asc"],
+                        y_txt_start=st.session_state["y_txt_start_for_eyekit_single_asc"],
+                        font_face=st.session_state["font_face_for_eyekit_single_asc"],
+                        font_size=st.session_state["font_size_for_eyekit_single_asc"],
+                        line_height=line_height,
+                        use_corrected_fixations=st.session_state["use_corrected_fixations_tickbox"],
+                        correction_algo=st.session_state["algo_choice_single_asc_eyekit"],
+                    )
+                    eyekitplot_img = ekm.eyekit_plot(textblock, fixation_sequence, screen_size)
+                    st.image(eyekitplot_img, "Fixations and stimulus as used for anaylsis")
+                    with open(
+                        f'results/fixation_sequence_eyekit_{st.session_state["trial"]["trial_id"]}.json', "r"
+                    ) as f:
+                        fixation_sequence_json = json.load(f)
+                    fixation_sequence_json_str = json.dumps(fixation_sequence_json)
+                    st.download_button(
+                        "Download fixations in eyekits format",
+                        fixation_sequence_json_str,
+                        f'fixation_sequence_eyekit_{st.session_state["trial"]["trial_id"]}.json',
+                        "json",
+                        key="download_eyekit_fix_json_single_asc",
+                    )
+                    with open(f'results/textblock_eyekit_{st.session_state["trial"]["trial_id"]}.json', "r") as f:
+                        textblock_json = json.load(f)
+                    textblock_json_str = json.dumps(textblock_json)
+                    st.download_button(
+                        "Download stimulus in eyekits format",
+                        textblock_json_str,
+                        f'textblock_eyekit_{st.session_state["trial"]["trial_id"]}.json',
+                        "json",
+                        key="download_eyekit_text_json_single_asc",
+                    )
+                    word_measures_df, character_measures_df = get_eyekit_measures(
+                        textblock, fixation_sequence, get_char_measures=False
+                    )
+                    st.dataframe(word_measures_df, use_container_width=True, hide_index=True)
+                    word_measures_df_csv = convert_df(word_measures_df)
+                    word_measures_df_download_btn = st.download_button(
+                        "Download word measures data",
+                        word_measures_df_csv,
+                        f'{st.session_state["trial"]["trial_id"]}_word_measures_df.csv',
+                        "text/csv",
+                        key="word_measures_df_download_btn",
+                    )
+                    measure_words = st.selectbox(
+                        "Select measure to visualize", list(ekm.MEASURES_DICT.keys()), key="measure_words"
+                    )
+                    st.image(ekm.plot_with_measure(textblock, fixation_sequence, screen_size, measure_words))
+                with own_analysis_tab:
+                    st.markdown(
+                        "This analysis method does not require manual alignment and works when the automated stimulus coordinates are correct."
+                    )
+                    own_word_measures = get_all_measures(
+                        st.session_state["trial"],
+                        st.session_state["dffix"],
+                        prefix="word",
+                        use_corrected_fixations=st.session_state["use_corrected_fixations_tickbox"],
+                        correction_algo=st.session_state["algo_choice_single_asc_eyekit"],
+                    )
+                    st.dataframe(own_word_measures, use_container_width=True, hide_index=True)
+                    own_word_measures_csv = convert_df(own_word_measures)
+                    word_measures_df_download_btn = st.download_button(
+                        "Download word measures data",
+                        own_word_measures_csv,
+                        f'{st.session_state["trial"]["trial_id"]}_own_word_measures_df.csv',
+                        "text/csv",
+                        key="own_word_measures_df_download_btn",
+                    )
+                    fix_to_plot = (
+                        ["Corrected Fixations"]
+                        if st.session_state["use_corrected_fixations_tickbox"]
+                        else ["Uncorrected Fixations"]
+                    )
+                    own_word_measures_fig, desired_width_in_pixels, desired_height_in_pixels = matplotlib_plot_df(
+                        st.session_state["dffix"],
+                        st.session_state["trial"],
+                        st.session_state["algo_choice"],
+                        stimulus_prefix="word",
+                        box_annotations=own_word_measures[measure_words],
+                        fix_to_plot=fix_to_plot,
+                    )
+                    st.pyplot(own_word_measures_fig)
+            else:
+                single_file_tab_asc_tab.warning("🚨 Stimulus information needed for analysis 🚨")
+single_file_tab_csv_tab.markdown(
+    "#### Upload one .csv file for the fixations and one .json or .csv file for the stimulus information and select a trial. Then select a correction algorithm and plot/download the results"
+)
+with single_file_tab_csv_tab.expander("Upload and preview data", expanded=True):
+    csv_upl_col1, csv_upl_col2 = st.columns(2)
+    single_csv_file = csv_upl_col1.file_uploader(
+        "Select .csv file containing the fixation data",
+        accept_multiple_files=False,
+        key="single_csv_file",
+        type={"csv", "txt", "dat"},
+    )
+    single_csv_stim_file = csv_upl_col2.file_uploader(
+        "Select .csv or .json file containing the stimulus data",
+        accept_multiple_files=False,
+        key="single_csv_file_stim",
+        type={"json", "csv", "txt", "dat"},
+    )
+    if single_csv_file:
+        st.session_state["dffix_single_csv"] = guess_col_names_fix(single_csv_file)
+        if st.session_state["dffix_single_csv"] is not None:
+            csv_upl_col1.dataframe(
+                st.session_state["dffix_single_csv"], use_container_width=True, hide_index=True, height=200
+            )
+    if single_csv_stim_file:
+        st.session_state["stimdf_single_csv"] = guess_col_names_stim(single_csv_stim_file)
+        if ".json" in single_csv_stim_file.name:
+            st.session_state["colnames_stim"] = st.session_state["stimdf_single_csv"].keys()
+        else:
+            st.session_state["colnames_stim"] = st.session_state["stimdf_single_csv"].columns
+        if st.session_state["stimdf_single_csv"] is not None:
+            if ".json" in single_csv_stim_file.name:
+                csv_upl_col2.json(st.session_state["stimdf_single_csv"])
+            else:
+                csv_upl_col2.dataframe(
+                    st.session_state["stimdf_single_csv"], use_container_width=True, hide_index=True, height=200
+                )
+if single_csv_file and single_csv_stim_file:
+    with single_file_tab_csv_tab.expander("Column names for csv files", expanded=True):
+        with st.form("Column names for csv files"):
+            st.markdown("### Please set column/key names for csv/json files")
+            st.markdown("#### Fixation file column names:")
+            c1, c2, c3 = st.columns(3)
+            x_col_name_fix = c1.text_input("x coordinate", key="x_col_name_fix", value="x")
+            y_col_name_fix = c2.text_input("y coordinate", key="y_col_name_fix", value="y")
+            subject_col_name_fix = c1.text_input("subject id", key="subject_col_name_fix", value="sub_id")
+            trial_id_col_name_fix = c3.text_input("trial id", key="trial_id_col_name_fix", value="trial_id")
+            time_start_col_name_fix = c2.text_input(
+                "fixation start time", key="time_start_col_name_fix", value="corrected_start_time"
+            )
+            time_stop_col_name_fix = c3.text_input(
+                "fixation end time", key="time_stop_col_name_fix", value="corrected_end_time"
+            )
+            st.markdown("#### Stimulus file column/key names:")
+            c1, c2, c3 = st.columns(3)
+            x_col_name_fix_stim = c1.text_input("x coordinate", key="x_col_name_fix_stim", value="char_x_center")
+            y_col_name_fix_stim = c2.text_input("y coordinate", key="y_col_name_fix_stim", value="char_y_center")
+            x_start_col_name_fix_stim = c3.text_input(
+                "x min of interest areas", key="x_start_col_name_fix_stim", value="char_xmin"
+            )
+            x_end_col_name_fix_stim = c1.text_input(
+                "x max of interest areas", key="x_end_col_name_fix_stim", value="char_xmax"
+            )
+            y_start_col_name_fix_stim = c2.text_input(
+                "y min of interest areas", key="y_start_col_name_fix_stim", value="char_ymin"
+            )
+            y_end_col_name_fix_stim = c3.text_input(
+                "x max of interest areas", key="y_end_col_name_fix_stim", value="char_ymax"
+            )
+            char_col_name_fix_stim = c1.text_input(
+                "content of interest area", key="char_col_name_fix_stim", value="char"
+            )
+            line_num_col_name_stim = c3.text_input(
+                "line number for interest areas", key="line_num_col_name_stim", value="assigned_line"
+            )
+            subject_col_name_stim = c1.text_input("subject id", key="subject_col_name_stim", value="sub_id")
+            trial_id_col_name_stim = c2.text_input("trial id", key="trial_id_col_name_stim", value="trial_id")
+            has_multiple_subject = c2.checkbox("multiple subject in file", key="has_multiple_subject")
+            form_submitted = st.form_submit_button("Confirm column/key names")
+if single_csv_file and single_csv_stim_file:
+    process_custom_csvs_button = single_file_tab_csv_tab.button(
+        "Load data from files",
+    )
+    if process_custom_csvs_button or "trial_keys_single_csv" in st.session_state:
+        trials_by_ids, trial_keys = get_fixations_file_trials_list(
+            st.session_state["dffix_single_csv"], st.session_state["stimdf_single_csv"]
+        )
+        st.session_state["trials_by_ids_single_csv"] = trials_by_ids
+        st.session_state["trial_keys_single_csv"] = trial_keys
+        with single_file_tab_csv_tab.form(key="trial_selection_algo_selection_form_single_csv"):
+            col_a1, col_a2 = st.columns((1, 2))
+            with col_a1:
+                trial_choice = st.selectbox(
+                    "Which trial should be corrected?",
+                    st.session_state["trial_keys_single_csv"],
+                    key="trial_id_selected_custom_csv",
+                    index=0,
+                )
+            with col_a2:
+                algo_choice_single_csv = st.multiselect(
+                    "Choose correction algorithm",
+                    ALGO_CHOICES,
+                    key="algo_choice_single_csv",
+                    default=[ALGO_CHOICES[0]],
+                )
+            process_trial_btn = st.form_submit_button("Correct trial")
+        if "trial_id_selected_custom_csv" in st.session_state and "algo_choice_single_csv" in st.session_state:
+            trial = st.session_state["trials_by_ids_single_csv"][trial_choice]
+            dffix, trial, dpi, screen_res, font, font_size = process_trial_choice_single_csv(
+                trial, algo_choice_single_csv
+            )
+            st.session_state["trial_single_csv"] = trial
+            csv = convert_df(dffix)
+            single_file_tab_csv_tab.download_button(
+                "Download corrected fixation data",
+                csv,
+                f'{trial["trial_id"]}.csv',
+                "text/csv",
+                key="download-csv-custom-csv",
+            )
+            with single_file_tab_csv_tab.expander("Show corrected fixation data", expanded=True):
+                st.dataframe(dffix, use_container_width=True, hide_index=True, height=200)
+            with single_file_tab_csv_tab.expander("Show fixation plots", expanded=True):
+                plotting_checkboxes_single_single_csv = st.multiselect(
+                    "Select what gets plotted",
+                    ["Uncorrected Fixations", "Corrected Fixations", "Words", "Word boxes"],
+                    key="plotting_checkboxes_single_single_csv",
+                    default=["Uncorrected Fixations", "Corrected Fixations", "Words", "Word boxes"],
+                )
+                st.plotly_chart(
+                    plotly_plot_with_image(
+                        dffix,
+                        trial,
+                        to_plot_list=plotting_checkboxes_single_single_csv,
+                        algo_choice=algo_choice_single_csv,
+                    ),
+                    use_container_width=True,
+                )
+                st.plotly_chart(plot_y_corr(dffix, algo_choice_single_csv), use_container_width=True)
+multi_file_tab.subheader("Upload multiple .asc files. Then select a correction algorithm and download the results.")
+with multi_file_tab.form("Upload files to be processed and select algorithm"):
+    multifile_col, multi_algo_col = st.columns((1, 1))
+    with multifile_col:
+        st.file_uploader(
+            "Upload .asc Files", accept_multiple_files=True, key="multi_asc_filelist", type=["asc", "tar", "zip"]
+        )
+    with multi_algo_col:
+        st.multiselect(
+            "Choose correction algorithms",
+            ALGO_CHOICES,
+            key="algo_choice_multi",
+            default=[ALGO_CHOICES[0]],
+        )
+    process_trial_btn_multi = st.form_submit_button("Load and correct asc files")
+    if process_trial_btn_multi:
+        get_trials_and_lines_from_asc_files(st.session_state["multi_asc_filelist"])
+if "zipfiles_with_results" in st.session_state:
+    multi_res_col1, multi_res_col2 = multi_file_tab.columns(2)
+    chosen_zip = multi_res_col1.selectbox("Choose results to download", st.session_state["zipfiles_with_results"])
+    st.session_state["logger"].info(f"Download button for {chosen_zip}")
+    st.session_state["logger"].info(st.session_state["zipfiles_with_results"])
+    zipnamestem = pl.Path(chosen_zip).stem
+    with open(chosen_zip, "rb") as f:
+        multi_res_col2.download_button(f"Download {zipnamestem}", f, file_name=f"results_{zipnamestem}.zip")
+if "trial_choices_multi" in st.session_state:
+    multi_plotting_options_col1, multi_plotting_options_col2 = multi_file_tab.columns(2)
+    trial_choice_multi = multi_plotting_options_col1.selectbox(
+        "Which trial should be plotted?",
+        st.session_state["trial_choices_multi"],
+        key="trial_id_multi",
+        placeholder="Select trial to display and plot",
+        on_change=process_trial_choice_and_update_df_multi,
+    )
+    plotting_checkboxes_multi = multi_plotting_options_col2.multiselect(
+        "Select what gets plotted",
+        ["Uncorrected Fixations", "Corrected Fixations", "Words", "Word boxes"],
+        default=["Uncorrected Fixations", "Corrected Fixations", "Words", "Word boxes"],
+    )
+    if trial_choice_multi and "dffix_multi" in st.session_state:
+        df_expander_multi = multi_file_tab.expander("Show Dataframe", False)
+        plot_expander_multi = multi_file_tab.expander("Show Plots", True)
+        df_expander_multi.dataframe(st.session_state["dffix_multi"])
+        dffix_multi = st.session_state["dffix_multi"]
+        trial_multi = st.session_state["trial_multi"]
+        plot_expander_multi.plotly_chart(
+            plotly_plot_with_image(
+                dffix_multi, trial_multi, st.session_state["algo_choice_multi"], to_plot_list=plotting_checkboxes_multi
+            ),
+            use_container_width=True,
+        )
+        plot_expander_multi.plotly_chart(
+            plot_y_corr(dffix_multi, st.session_state["algo_choice_multi"]), use_container_width=True
+        )

classic_correction_algos.py ADDED Viewed

	@@ -0,0 +1,546 @@

+"""
+Mostly adapted from https://github.com/jwcarr/eyekit/blob/350d055eecaa1581b03db5a847424825ffbb10f6/eyekit/_snap.py
+"""
+import numpy as np
+from sklearn.cluster import KMeans
+def apply_classic_algo(
+    dffix,
+    trial,
+    algo="slice",
+    algo_params=dict(x_thresh=192, y_thresh=32, w_thresh=32, n_thresh=90),
+):
+    fixation_array = dffix.loc[:, ["x", "y"]].values
+    y_diff = trial["y_diff"]
+    if "y_char_unique" in trial:
+        midlines = trial["y_char_unique"]
+    else:
+        midlines = trial["y_midline"]
+    if len(midlines) == 1:
+        corrected_fix_y_vals = np.ones((fixation_array.shape[0])) * midlines[0]
+    elif fixation_array.shape[0] <= 2:
+        corrected_fix_y_vals = np.ones((fixation_array.shape[0])) * midlines[0]
+    else:
+        if algo == "slice":
+            corrected_fix_y_vals = slice(fixation_array, midlines, line_height=y_diff, **algo_params)
+        elif algo == "warp":
+            word_center_list = [(word["word_x_center"], word["word_y_center"]) for word in trial["words_list"]]
+            corrected_fix_y_vals = warp(fixation_array, word_center_list)
+        elif algo == "chain":
+            corrected_fix_y_vals = chain(fixation_array, midlines, **algo_params)
+        elif algo == "cluster":
+            corrected_fix_y_vals = cluster(fixation_array, midlines)
+        elif algo == "merge":
+            corrected_fix_y_vals = merge(fixation_array, midlines, **algo_params)
+        elif algo == "regress":
+            corrected_fix_y_vals = regress(fixation_array, midlines, **algo_params)
+        elif algo == "segment":
+            corrected_fix_y_vals = segment(fixation_array, midlines, **algo_params)
+        elif algo == "split":
+            corrected_fix_y_vals = split(fixation_array, midlines, **algo_params)
+        elif algo == "stretch":
+            corrected_fix_y_vals = stretch(fixation_array, midlines, **algo_params)
+        elif algo == "attach":
+            corrected_fix_y_vals = attach(fixation_array, midlines)
+        elif algo == "compare":
+            word_center_list = [(word["word_x_center"], word["word_y_center"]) for word in trial["words_list"]]
+            n_nearest_lines = min(algo_params["n_nearest_lines"], len(midlines) - 1)
+            algo_params["n_nearest_lines"] = n_nearest_lines
+            corrected_fix_y_vals = compare(fixation_array, np.array(word_center_list), **algo_params)
+        else:
+            raise NotImplementedError(f"{algo} not implemented")
+    corrected_line_nums = [trial["y_char_unique"].index(y) for y in corrected_fix_y_vals]
+    dffix[f"y_{algo}"] = corrected_fix_y_vals
+    dffix[f"line_num_{algo}"] = corrected_line_nums
+    return dffix
+def slice(fixation_XY, midlines, line_height: float, x_thresh=192, y_thresh=32, w_thresh=32, n_thresh=90):
+    """
+    Adapted from Eyekit(https://github.com/jwcarr/eyekit/blob/350d055eecaa1581b03db5a847424825ffbb10f6/eyekit/_snap.py)
+    implementation
+    Form a set of runs and then reduce the set to *m* by repeatedly merging
+    those that appear to be on the same line. Merged sequences are then
+    assigned to text lines in positional order. Default params:
+    `x_thresh=192`, `y_thresh=32`, `w_thresh=32`, `n_thresh=90`. Requires
+    NumPy. Original method by [Glandorf & Schroeder (2021)](https://doi.org/10.1016/j.procs.2021.09.069).
+    """
+    fixation_XY = np.array(fixation_XY, dtype=float)
+    line_Y = np.array(midlines, dtype=float)
+    proto_lines, phantom_proto_lines = {}, {}
+    # 1. Segment runs
+    dist_X = abs(np.diff(fixation_XY[:, 0]))
+    dist_Y = abs(np.diff(fixation_XY[:, 1]))
+    end_run_indices = list(np.where(np.logical_or(dist_X > x_thresh, dist_Y > y_thresh))[0] + 1)
+    run_starts = [0] + end_run_indices
+    run_ends = end_run_indices + [len(fixation_XY)]
+    runs = [list(range(start, end)) for start, end in zip(run_starts, run_ends)]
+    # 2. Determine starting run
+    longest_run_i = np.argmax([fixation_XY[run[-1], 0] - fixation_XY[run[0], 0] for run in runs])
+    proto_lines[0] = runs.pop(longest_run_i)
+    # 3. Group runs into proto lines
+    while runs:
+        merger_on_this_iteration = False
+        for proto_line_i, direction in [(min(proto_lines), -1), (max(proto_lines), 1)]:
+            # Create new proto line above or below (depending on direction)
+            proto_lines[proto_line_i + direction] = []
+            # Get current proto line XY coordinates (if proto line is empty, get phanton coordinates)
+            if proto_lines[proto_line_i]:
+                proto_line_XY = fixation_XY[proto_lines[proto_line_i]]
+            else:
+                proto_line_XY = phantom_proto_lines[proto_line_i]
+            # Compute differences between current proto line and all runs
+            run_differences = np.zeros(len(runs))
+            for run_i, run in enumerate(runs):
+                y_diffs = [y - proto_line_XY[np.argmin(abs(proto_line_XY[:, 0] - x)), 1] for x, y in fixation_XY[run]]
+                run_differences[run_i] = np.mean(y_diffs)
+            # Find runs that can be merged into this proto line
+            merge_into_current = list(np.where(abs(run_differences) < w_thresh)[0])
+            # Find runs that can be merged into the adjacent proto line
+            merge_into_adjacent = list(
+                np.where(
+                    np.logical_and(
+                        run_differences * direction >= w_thresh,
+                        run_differences * direction < n_thresh,
+                    )
+                )[0]
+            )
+            # Perform mergers
+            for index in merge_into_current:
+                proto_lines[proto_line_i].extend(runs[index])
+            for index in merge_into_adjacent:
+                proto_lines[proto_line_i + direction].extend(runs[index])
+            # If no, mergers to the adjacent, create phantom line for the adjacent
+            if not merge_into_adjacent:
+                average_x, average_y = np.mean(proto_line_XY, axis=0)
+                adjacent_y = average_y + line_height * direction
+                phantom_proto_lines[proto_line_i + direction] = np.array([[average_x, adjacent_y]])
+            # Remove all runs that were merged on this iteration
+            for index in sorted(merge_into_current + merge_into_adjacent, reverse=True):
+                del runs[index]
+                merger_on_this_iteration = True
+        # If no mergers were made, break the while loop
+        if not merger_on_this_iteration:
+            break
+    # 4. Assign any leftover runs to the closest proto lines
+    for run in runs:
+        best_pl_distance = np.inf
+        best_pl_assignemnt = None
+        for proto_line_i in proto_lines:
+            if proto_lines[proto_line_i]:
+                proto_line_XY = fixation_XY[proto_lines[proto_line_i]]
+            else:
+                proto_line_XY = phantom_proto_lines[proto_line_i]
+            y_diffs = [y - proto_line_XY[np.argmin(abs(proto_line_XY[:, 0] - x)), 1] for x, y in fixation_XY[run]]
+            pl_distance = abs(np.mean(y_diffs))
+            if pl_distance < best_pl_distance:
+                best_pl_distance = pl_distance
+                best_pl_assignemnt = proto_line_i
+        proto_lines[best_pl_assignemnt].extend(run)
+    # 5. Prune proto lines
+    while len(proto_lines) > len(line_Y):
+        top, bot = min(proto_lines), max(proto_lines)
+        if len(proto_lines[top]) < len(proto_lines[bot]):
+            proto_lines[top + 1].extend(proto_lines[top])
+            del proto_lines[top]
+        else:
+            proto_lines[bot - 1].extend(proto_lines[bot])
+            del proto_lines[bot]
+    # 6. Map proto lines to text lines
+    for line_i, proto_line_i in enumerate(sorted(proto_lines)):
+        fixation_XY[proto_lines[proto_line_i], 1] = line_Y[line_i]
+    return fixation_XY[:, 1]
+def attach(fixation_XY, line_Y):
+    n = len(fixation_XY)
+    for fixation_i in range(n):
+        line_i = np.argmin(abs(line_Y - fixation_XY[fixation_i, 1]))
+        fixation_XY[fixation_i, 1] = line_Y[line_i]
+    return fixation_XY[:, 1]
+def chain(fixation_XY, midlines, x_thresh=192, y_thresh=32):
+    """
+    Adapted from Eyekit(https://github.com/jwcarr/eyekit/blob/350d055eecaa1581b03db5a847424825ffbb10f6/eyekit/_snap.py)
+    implementation
+    Chain consecutive fixations that are sufficiently close to each other, and
+    then assign chains to their closest text lines. Default params:
+    `x_thresh=192`, `y_thresh=32`. Requires NumPy. Original method
+    implemented in [popEye](https://github.com/sascha2schroeder/popEye/).
+    """
+    try:
+        import numpy as np
+    except ModuleNotFoundError as e:
+        e.msg = "The chain method requires NumPy."
+        raise
+    fixation_XY = np.array(fixation_XY)
+    line_Y = np.array(midlines)
+    dist_X = abs(np.diff(fixation_XY[:, 0]))
+    dist_Y = abs(np.diff(fixation_XY[:, 1]))
+    end_chain_indices = list(np.where(np.logical_or(dist_X > x_thresh, dist_Y > y_thresh))[0] + 1)
+    end_chain_indices.append(len(fixation_XY))
+    start_of_chain = 0
+    for end_of_chain in end_chain_indices:
+        mean_y = np.mean(fixation_XY[start_of_chain:end_of_chain, 1])
+        line_i = np.argmin(abs(line_Y - mean_y))
+        fixation_XY[start_of_chain:end_of_chain, 1] = line_Y[line_i]
+        start_of_chain = end_of_chain
+    return fixation_XY[:, 1]
+def cluster(fixation_XY, line_Y):
+    m = len(line_Y)
+    fixation_Y = fixation_XY[:, 1].reshape(-1, 1)
+    clusters = KMeans(m, n_init=100, max_iter=300).fit_predict(fixation_Y)
+    centers = [fixation_Y[clusters == i].mean() for i in range(m)]
+    ordered_cluster_indices = np.argsort(centers)
+    for fixation_i, cluster_i in enumerate(clusters):
+        line_i = np.where(ordered_cluster_indices == cluster_i)[0][0]
+        fixation_XY[fixation_i, 1] = line_Y[line_i]
+    return fixation_XY[:, 1]
+def compare(fixation_XY, word_XY, x_thresh=512, n_nearest_lines=3):
+    # COMPARE
+    #
+    # Lima Sanches, C., Kise, K., & Augereau, O. (2015). Eye gaze and text
+    #   line matching for reading analysis. In Adjunct proceedings of the
+    #   2015 ACM International Joint Conference on Pervasive and
+    #   Ubiquitous Computing and proceedings of the 2015 ACM International
+    #   Symposium on Wearable Computers (pp. 1227–1233). Association for
+    #   Computing Machinery.
+    #
+    # https://doi.org/10.1145/2800835.2807936
+    line_Y = np.unique(word_XY[:, 1])
+    n = len(fixation_XY)
+    diff_X = np.diff(fixation_XY[:, 0])
+    end_line_indices = list(np.where(diff_X < -x_thresh)[0] + 1)
+    end_line_indices.append(n)
+    start_of_line = 0
+    for end_of_line in end_line_indices:
+        gaze_line = fixation_XY[start_of_line:end_of_line]
+        mean_y = np.mean(gaze_line[:, 1])
+        lines_ordered_by_proximity = np.argsort(abs(line_Y - mean_y))
+        nearest_line_I = lines_ordered_by_proximity[:n_nearest_lines]
+        line_costs = np.zeros(n_nearest_lines)
+        for candidate_i in range(n_nearest_lines):
+            candidate_line_i = nearest_line_I[candidate_i]
+            text_line = word_XY[word_XY[:, 1] == line_Y[candidate_line_i]]
+            dtw_cost, dtw_path = dynamic_time_warping(gaze_line[:, 0:1], text_line[:, 0:1])
+            line_costs[candidate_i] = dtw_cost
+        line_i = nearest_line_I[np.argmin(line_costs)]
+        fixation_XY[start_of_line:end_of_line, 1] = line_Y[line_i]
+        start_of_line = end_of_line
+    return fixation_XY[:, 1]
+def merge(fixation_XY, midlines, text_right_to_left=False, y_thresh=32, gradient_thresh=0.1, error_thresh=20):
+    """
+    Form a set of progressive sequences and then reduce the set to *m* by
+    repeatedly merging those that appear to be on the same line. Merged
+    sequences are then assigned to text lines in positional order. Default
+    params: `y_thresh=32`, `gradient_thresh=0.1`, `error_thresh=20`. Requires
+    NumPy. Original method by [Špakov et al. (2019)](https://doi.org/10.3758/s13428-018-1120-x).
+    """
+    try:
+        import numpy as np
+    except ModuleNotFoundError as e:
+        e.msg = "The merge method requires NumPy."
+        raise
+    fixation_XY = np.array(fixation_XY)
+    line_Y = np.array(midlines)
+    diff_X = np.diff(fixation_XY[:, 0])
+    dist_Y = abs(np.diff(fixation_XY[:, 1]))
+    if text_right_to_left:
+        sequence_boundaries = list(np.where(np.logical_or(diff_X > 0, dist_Y > y_thresh))[0] + 1)
+    else:
+        sequence_boundaries = list(np.where(np.logical_or(diff_X < 0, dist_Y > y_thresh))[0] + 1)
+    sequence_starts = [0] + sequence_boundaries
+    sequence_ends = sequence_boundaries + [len(fixation_XY)]
+    sequences = [list(range(start, end)) for start, end in zip(sequence_starts, sequence_ends)]
+    for min_i, min_j, remove_constraints in [
+        (3, 3, False),  # Phase 1
+        (1, 3, False),  # Phase 2
+        (1, 1, False),  # Phase 3
+        (1, 1, True),  # Phase 4
+    ]:
+        while len(sequences) > len(line_Y):
+            best_merger = None
+            best_error = np.inf
+            for i in range(len(sequences) - 1):
+                if len(sequences[i]) < min_i:
+                    continue  # first sequence too short, skip to next i
+                for j in range(i + 1, len(sequences)):
+                    if len(sequences[j]) < min_j:
+                        continue  # second sequence too short, skip to next j
+                    candidate_XY = fixation_XY[sequences[i] + sequences[j]]
+                    gradient, intercept = np.polyfit(candidate_XY[:, 0], candidate_XY[:, 1], 1)
+                    residuals = candidate_XY[:, 1] - (gradient * candidate_XY[:, 0] + intercept)
+                    error = np.sqrt(sum(residuals**2) / len(candidate_XY))
+                    if remove_constraints or (abs(gradient) < gradient_thresh and error < error_thresh):
+                        if error < best_error:
+                            best_merger = (i, j)
+                            best_error = error
+            if best_merger is None:
+                break  # no possible mergers, break while and move to next phase
+            merge_i, merge_j = best_merger
+            merged_sequence = sequences[merge_i] + sequences[merge_j]
+            sequences.append(merged_sequence)
+            del sequences[merge_j], sequences[merge_i]
+    mean_Y = [fixation_XY[sequence, 1].mean() for sequence in sequences]
+    ordered_sequence_indices = np.argsort(mean_Y)
+    for line_i, sequence_i in enumerate(ordered_sequence_indices):
+        fixation_XY[sequences[sequence_i], 1] = line_Y[line_i]
+    return fixation_XY[:, 1]
+def regress(
+    fixation_XY,
+    midlines,
+    slope_bounds=(-0.1, 0.1),
+    offset_bounds=(-50, 50),
+    std_bounds=(1, 20),
+):
+    """
+    Find *m* regression lines that best fit the fixations and group fixations
+    according to best fit regression lines, and then assign groups to text
+    lines in positional order. Default params: `slope_bounds=(-0.1, 0.1)`,
+    `offset_bounds=(-50, 50)`, `std_bounds=(1, 20)`. Requires SciPy.
+    Original method by [Cohen (2013)](https://doi.org/10.3758/s13428-012-0280-3).
+    """
+    try:
+        import numpy as np
+        from scipy.optimize import minimize
+        from scipy.stats import norm
+    except ModuleNotFoundError as e:
+        e.msg = "The regress method requires SciPy."
+        raise
+    fixation_XY = np.array(fixation_XY)
+    line_Y = np.array(midlines)
+    density = np.zeros((len(fixation_XY), len(line_Y)))
+    def fit_lines(params):
+        k = slope_bounds[0] + (slope_bounds[1] - slope_bounds[0]) * norm.cdf(params[0])
+        o = offset_bounds[0] + (offset_bounds[1] - offset_bounds[0]) * norm.cdf(params[1])
+        s = std_bounds[0] + (std_bounds[1] - std_bounds[0]) * norm.cdf(params[2])
+        predicted_Y_from_slope = fixation_XY[:, 0] * k
+        line_Y_plus_offset = line_Y + o
+        for line_i in range(len(line_Y)):
+            fit_Y = predicted_Y_from_slope + line_Y_plus_offset[line_i]
+            density[:, line_i] = norm.logpdf(fixation_XY[:, 1], fit_Y, s)
+        return -sum(density.max(axis=1))
+    best_fit = minimize(fit_lines, [0, 0, 0], method="powell")
+    fit_lines(best_fit.x)
+    return line_Y[density.argmax(axis=1)]
+def segment(fixation_XY, midlines, text_right_to_left=False):
+    """
+    Segment fixation sequence into *m* subsequences based on *m*–1 most-likely
+    return sweeps, and then assign subsequences to text lines in chronological
+    order. Requires NumPy. Original method by
+    [Abdulin & Komogortsev (2015)](https://doi.org/10.1109/BTAS.2015.7358786).
+    """
+    try:
+        import numpy as np
+    except ModuleNotFoundError as e:
+        e.msg = "The segment method requires NumPy."
+        raise
+    fixation_XY = np.array(fixation_XY)
+    line_Y = np.array(midlines)
+    diff_X = np.diff(fixation_XY[:, 0])
+    saccades_ordered_by_length = np.argsort(diff_X)
+    if text_right_to_left:
+        line_change_indices = saccades_ordered_by_length[-(len(line_Y) - 1) :]
+    else:
+        line_change_indices = saccades_ordered_by_length[: len(line_Y) - 1]
+    current_line_i = 0
+    for fixation_i in range(len(fixation_XY)):
+        fixation_XY[fixation_i, 1] = line_Y[current_line_i]
+        if fixation_i in line_change_indices:
+            current_line_i += 1
+    return fixation_XY[:, 1]
+def split(fixation_XY, midlines, text_right_to_left=False):
+    """
+    Split fixation sequence into subsequences based on best candidate return
+    sweeps, and then assign subsequences to closest text lines. Requires
+    SciPy. Original method by [Carr et al. (2022)](https://doi.org/10.3758/s13428-021-01554-0).
+    """
+    try:
+        import numpy as np
+        from scipy.cluster.vq import kmeans2
+    except ModuleNotFoundError as e:
+        e.msg = "The split method requires SciPy."
+        raise
+    fixation_XY = np.array(fixation_XY)
+    line_Y = np.array(midlines)
+    diff_X = np.array(np.diff(fixation_XY[:, 0]), dtype=float).reshape(-1, 1)
+    centers, clusters = kmeans2(diff_X, 2, iter=100, minit="++", missing="raise")
+    if text_right_to_left:
+        sweep_marker = np.argmax(centers)
+    else:
+        sweep_marker = np.argmin(centers)
+    end_line_indices = list(np.where(clusters == sweep_marker)[0] + 1)
+    end_line_indices.append(len(fixation_XY))
+    start_of_line = 0
+    for end_of_line in end_line_indices:
+        mean_y = np.mean(fixation_XY[start_of_line:end_of_line, 1])
+        line_i = np.argmin(abs(line_Y - mean_y))
+        fixation_XY[start_of_line:end_of_line] = line_Y[line_i]
+        start_of_line = end_of_line
+    return fixation_XY[:, 1]
+def stretch(fixation_XY, midlines, stretch_bounds=(0.9, 1.1), offset_bounds=(-50, 50)):
+    """
+    Find a stretch factor and offset that results in a good alignment between
+    the fixations and lines of text, and then assign the transformed fixations
+    to the closest text lines. Default params: `stretch_bounds=(0.9, 1.1)`,
+    `offset_bounds=(-50, 50)`. Requires SciPy.
+    Original method by [Lohmeier (2015)](http://www.monochromata.de/master_thesis/ma1.3.pdf).
+    """
+    try:
+        import numpy as np
+        from scipy.optimize import minimize
+    except ModuleNotFoundError as e:
+        e.msg = "The stretch method requires SciPy."
+        raise
+    fixation_Y = np.array(fixation_XY)[:, 1]
+    line_Y = np.array(midlines)
+    n = len(fixation_Y)
+    corrected_Y = np.zeros(n)
+    def fit_lines(params):
+        candidate_Y = fixation_Y * params[0] + params[1]
+        for fixation_i in range(n):
+            line_i = np.argmin(abs(line_Y - candidate_Y[fixation_i]))
+            corrected_Y[fixation_i] = line_Y[line_i]
+        return sum(abs(candidate_Y - corrected_Y))
+    best_fit = minimize(fit_lines, [1, 0], method="powell", bounds=[stretch_bounds, offset_bounds])
+    fit_lines(best_fit.x)
+    return corrected_Y
+def warp(fixation_XY, word_center_list):
+    """
+    Map fixations to word centers using [Dynamic Time
+    Warping](https://en.wikipedia.org/wiki/Dynamic_time_warping). This finds a
+    monotonically increasing mapping between fixations and words with the
+    shortest overall distance, effectively resulting in *m* subsequences.
+    Fixations are then assigned to the lines that their mapped words belong
+    to, effectively assigning subsequences to text lines in chronological
+    order. Requires NumPy.
+    Original method by [Carr et al. (2022)](https://doi.org/10.3758/s13428-021-01554-0).
+    """
+    try:
+        import numpy as np
+    except ModuleNotFoundError as e:
+        e.msg = "The warp method requires NumPy."
+        raise
+    fixation_XY = np.array(fixation_XY)
+    word_XY = np.array([word_center for word_center in word_center_list])
+    n1 = len(fixation_XY)
+    n2 = len(word_XY)
+    cost = np.zeros((n1 + 1, n2 + 1))
+    cost[0, :] = np.inf
+    cost[:, 0] = np.inf
+    cost[0, 0] = 0
+    for fixation_i in range(n1):
+        for word_i in range(n2):
+            distance = np.sqrt(sum((fixation_XY[fixation_i] - word_XY[word_i]) ** 2))
+            cost[fixation_i + 1, word_i + 1] = distance + min(
+                cost[fixation_i, word_i + 1],
+                cost[fixation_i + 1, word_i],
+                cost[fixation_i, word_i],
+            )
+    cost = cost[1:, 1:]
+    warping_path = [[] for _ in range(n1)]
+    while fixation_i > 0 or word_i > 0:
+        warping_path[fixation_i].append(word_i)
+        possible_moves = [np.inf, np.inf, np.inf]
+        if fixation_i > 0 and word_i > 0:
+            possible_moves[0] = cost[fixation_i - 1, word_i - 1]
+        if fixation_i > 0:
+            possible_moves[1] = cost[fixation_i - 1, word_i]
+        if word_i > 0:
+            possible_moves[2] = cost[fixation_i, word_i - 1]
+        best_move = np.argmin(possible_moves)
+        if best_move == 0:
+            fixation_i -= 1
+            word_i -= 1
+        elif best_move == 1:
+            fixation_i -= 1
+        else:
+            word_i -= 1
+    warping_path[0].append(0)
+    for fixation_i, words_mapped_to_fixation_i in enumerate(warping_path):
+        candidate_Y = list(word_XY[words_mapped_to_fixation_i, 1])
+        fixation_XY[fixation_i, 1] = max(set(candidate_Y), key=candidate_Y.count)
+    return fixation_XY[:, 1]
+def dynamic_time_warping(sequence1, sequence2):
+    n1 = len(sequence1)
+    n2 = len(sequence2)
+    dtw_cost = np.zeros((n1 + 1, n2 + 1))
+    dtw_cost[0, :] = np.inf
+    dtw_cost[:, 0] = np.inf
+    dtw_cost[0, 0] = 0
+    for i in range(n1):
+        for j in range(n2):
+            this_cost = np.sqrt(sum((sequence1[i] - sequence2[j]) ** 2))
+            dtw_cost[i + 1, j + 1] = this_cost + min(dtw_cost[i, j + 1], dtw_cost[i + 1, j], dtw_cost[i, j])
+    dtw_cost = dtw_cost[1:, 1:]
+    dtw_path = [[] for _ in range(n1)]
+    while i > 0 or j > 0:
+        dtw_path[i].append(j)
+        possible_moves = [np.inf, np.inf, np.inf]
+        if i > 0 and j > 0:
+            possible_moves[0] = dtw_cost[i - 1, j - 1]
+        if i > 0:
+            possible_moves[1] = dtw_cost[i - 1, j]
+        if j > 0:
+            possible_moves[2] = dtw_cost[i, j - 1]
+        best_move = np.argmin(possible_moves)
+        if best_move == 0:
+            i -= 1
+            j -= 1
+        elif best_move == 1:
+            i -= 1
+        else:
+            j -= 1
+    dtw_path[0].append(0)
+    return dtw_cost[-1, -1], dtw_path
+def wisdom_of_the_crowd(assignments):
+    """
+    For each fixation, choose the y-value with the most votes across multiple
+    algorithms. In the event of a tie, the left-most algorithm is given
+    priority.
+    """
+    assignments = np.column_stack(assignments)
+    correction = []
+    for row in assignments:
+        candidates = list(row)
+        candidate_counts = {y: candidates.count(y) for y in set(candidates)}
+        best_count = max(candidate_counts.values())
+        best_candidates = [y for y, c in candidate_counts.items() if c == best_count]
+        if len(best_candidates) == 1:
+            correction.append(best_candidates[0])
+        else:
+            for y in row:
+                if y in best_candidates:
+                    correction.append(y)
+                    break
+    return correction

eyekit_measures.py ADDED Viewed

	@@ -0,0 +1,178 @@

+import copy
+import eyekit as ek
+import numpy as np
+import pandas as pd
+from PIL import Image
+MEASURES_DICT = {
+    "number_of_fixations": [],
+    "initial_fixation_duration": [],
+    "first_of_many_duration": [],
+    "total_fixation_duration": [],
+    "gaze_duration": [],
+    "go_past_duration": [],
+    "second_pass_duration": [],
+    "initial_landing_position": [],
+    "initial_landing_distance": [],
+    "landing_distances": [],
+    "number_of_regressions_in": [],
+}
+def get_fix_seq_and_text_block(
+    dffix,
+    trial,
+    x_txt_start=None,
+    y_txt_start=None,
+    font_face="Courier New",
+    font_size=None,
+    line_height=None,
+    use_corrected_fixations=True,
+    correction_algo="warp",
+):
+    if use_corrected_fixations and correction_algo is not None:
+        fixations_tuples = [
+            (
+                (x[1]["x"], x[1][f"y_{correction_algo}"], x[1]["corrected_start_time"], x[1]["corrected_end_time"])
+                if x[1]["corrected_start_time"] < x[1]["corrected_end_time"]
+                else (x[1]["x"], x[1]["y"], x[1]["corrected_start_time"], x[1]["corrected_end_time"] + 1)
+            )
+            for x in dffix.iterrows()
+        ]
+    else:
+        fixations_tuples = [
+            (
+                (x[1]["x"], x[1]["y"], x[1]["corrected_start_time"], x[1]["corrected_end_time"])
+                if x[1]["corrected_start_time"] < x[1]["corrected_end_time"]
+                else (x[1]["x"], x[1]["y"], x[1]["corrected_start_time"], x[1]["corrected_end_time"] + 1)
+            )
+            for x in dffix.iterrows()
+        ]
+    try:
+        fixation_sequence = ek.FixationSequence(fixations_tuples)
+    except Exception as e:
+        print(e)
+        print(f"Creating fixation failed for {trial['trial_id']} {trial['filename']}")
+        return dffix
+    if "display_coords" in trial:
+        display_coords = trial["display_coords"]
+    else:
+        display_coords = (0, 0, 1920, 1080)
+    screen_size = ((display_coords[2] - display_coords[0]), (display_coords[3] - display_coords[1]))
+    y_diffs = np.unique(trial["line_heights"])
+    if len(y_diffs) == 1:
+        y_diff = y_diffs[0]
+    else:
+        y_diff = np.min(y_diffs)
+    chars_list = trial["chars_list"]
+    max_line = int(chars_list[-1]["assigned_line"])
+    words_on_lines = {x: [] for x in range(int(max_line) + 1)}
+    [words_on_lines[x["assigned_line"]].append(x["char"]) for x in chars_list]
+    sentence_list = ["".join([s for s in v]) for idx, v in words_on_lines.items()]
+    if x_txt_start is None:
+        x_txt_start = float(chars_list[0]["char_xmin"])
+    if y_txt_start is None:
+        y_txt_start = float(chars_list[0]["char_ymax"])
+    if font_face is None and "font" in trial:
+        font_face = trial["font"]
+    elif font_face is None:
+        font_face = "DejaVu Sans Mono"
+    if font_size is None and "font_size" in trial:
+        font_size = trial["font_size"]
+    elif font_size is None:
+        font_size = float(y_diff * 0.333)  # pixel to point conversion
+    if line_height is None:
+        line_height = float(y_diff)
+    textblock = ek.TextBlock(
+        sentence_list,
+        position=(float(x_txt_start), float(y_txt_start)),
+        font_face=font_face,
+        line_height=line_height,
+        font_size=font_size,
+        anchor="left",
+        align="left",
+    )
+    # eyekit_plot(textblock, fixation_sequence, screen_size)
+    ek.io.save(fixation_sequence, f'results/fixation_sequence_eyekit_{trial["trial_id"]}.json', compress=False)
+    ek.io.save(textblock, f'results/textblock_eyekit_{trial["trial_id"]}.json', compress=False)
+    return fixation_sequence, textblock, screen_size
+def eyekit_plot(textblock, fixation_sequence, screen_size):
+    img = ek.vis.Image(*screen_size)
+    img.draw_text_block(textblock)
+    for word in textblock.words():
+        img.draw_rectangle(word, color="hotpink")
+    img.draw_fixation_sequence(fixation_sequence)
+    img.save("temp_eyekit_img.png", crop_margin=200)
+    img_png = Image.open("temp_eyekit_img.png")
+    return img_png
+def plot_with_measure(textblock, fixation_sequence, screen_size, measure, use_characters=False):
+    eyekitplot_img = eyekit_plot(textblock, fixation_sequence, screen_size)
+    eyekitplot_img = ek.vis.Image(*screen_size)
+    eyekitplot_img.draw_text_block(textblock)
+    if use_characters:
+        measure_results = getattr(ek.measure, measure)(textblock.characters(), fixation_sequence)
+        enum = textblock.characters()
+    else:
+        measure_results = getattr(ek.measure, measure)(textblock.words(), fixation_sequence)
+        enum = textblock.words()
+    for word in enum:
+        eyekitplot_img.draw_rectangle(word, color="lightseagreen")
+        x = word.onset
+        y = word.y_br - 3
+        label = f"{measure_results[word.id]}"
+        eyekitplot_img.draw_annotation((x, y), label, color="lightseagreen", font_face="Arial bold", font_size=15)
+    eyekitplot_img.draw_fixation_sequence(fixation_sequence, color="gray")
+    eyekitplot_img.save("multiline_passage_piccol.png", crop_margin=100)
+    img_png = Image.open("multiline_passage_piccol.png")
+    return img_png
+def get_eyekit_measures(_txt, _seq, get_char_measures=False):
+    measures = copy.deepcopy(MEASURES_DICT)
+    words = []
+    for w in _txt.words():
+        words.append(w.text)
+        for m in measures.keys():
+            measures[m].append(getattr(ek.measure, m)(w, _seq))
+    word_measures_df = pd.DataFrame(measures)
+    word_measures_df["word_number"] = np.arange(0, len(words))
+    word_measures_df["word"] = words
+    first_column = word_measures_df.pop("word")
+    word_measures_df.insert(0, "word", first_column)
+    first_column = word_measures_df.pop("word_number")
+    word_measures_df.insert(0, "word_number", first_column)
+    if get_char_measures:
+        measures = copy.deepcopy(MEASURES_DICT)
+        characters = []
+        for c in _txt.characters():
+            characters.append(c.text)
+            for m in measures.keys():
+                measures[m].append(getattr(ek.measure, m)(c, _seq))
+        character_measures_df = pd.DataFrame(measures)
+        character_measures_df["char_number"] = np.arange(0, len(characters))
+        character_measures_df["character"] = characters
+        first_column = character_measures_df.pop("character")
+        character_measures_df.insert(0, "character", first_column)
+        first_column = character_measures_df.pop("char_number")
+        character_measures_df.insert(0, "char_number", first_column)
+    else:
+        character_measures_df = None
+    return word_measures_df, character_measures_df

loss_functions.py ADDED Viewed

	@@ -0,0 +1,179 @@

+import torch as t
+def macro_soft_f1(real_vals, predictions, reduction):
+    """from https://towardsdatascience.com/the-unknown-benefits-of-using-a-soft-f1-loss-in-classification-systems-753902c0105d"""
+    true_positive = (real_vals * predictions).sum(dim=0)
+    false_positive = (predictions * (1 - real_vals)).sum(dim=0)
+    false_negative = ((1 - predictions) * real_vals).sum(dim=0)
+    soft_f1 = 2 * true_positive / (2 * true_positive + false_negative + false_positive + 1e-16)
+    if reduction == "mean":
+        loss = t.mean(1 - soft_f1)
+    else:
+        loss = 1 - soft_f1
+    return loss
+def coral_loss(logits, levels, importance_weights=None, reduction="mean"):
+    """Computes the CORAL loss described in
+    Cao, Mirjalili, and Raschka (2020)
+    *Rank Consistent Ordinal Regression for Neural Networks
+       with Application to Age Estimation*
+    Pattern Recognition Letters, https://doi.org/10.1016/j.patrec.2020.11.008
+    Parameters
+    ----------
+    logits : torch.tensor, shape(num_examples, num_classes-1)
+        Outputs of the CORAL layer.
+    levels : torch.tensor, shape(num_examples, num_classes-1)
+        True labels represented as extended binary vectors
+        (via `coral_pytorch.dataset.levels_from_labelbatch`).
+    importance_weights : torch.tensor, shape=(num_classes-1,) (default=None)
+        Optional weights for the different labels in levels.
+        A tensor of ones, i.e.,
+        `torch.ones(num_classes-1, dtype=torch.float32)`
+        will result in uniform weights that have the same effect as None.
+    reduction : str or None (default='mean')
+        If 'mean' or 'sum', returns the averaged or summed loss value across
+        all data points (rows) in logits. If None, returns a vector of
+        shape (num_examples,)
+    Returns
+    ----------
+        loss : torch.tensor
+        A torch.tensor containing a single loss value (if `reduction='mean'` or '`sum'`)
+        or a loss value for each data record (if `reduction=None`).
+    Examples
+    ----------
+    >>> import torch
+    >>> from coral_pytorch.losses import coral_loss
+    >>> levels = torch.tensor(
+    ...    [[1., 1., 0., 0.],
+    ...     [1., 0., 0., 0.],
+    ...    [1., 1., 1., 1.]])
+    >>> logits = torch.tensor(
+    ...    [[2.1, 1.8, -2.1, -1.8],
+    ...     [1.9, -1., -1.5, -1.3],
+    ...     [1.9, 1.8, 1.7, 1.6]])
+    >>> coral_loss(logits, levels)
+    tensor(0.6920)
+    https://github.com/Raschka-research-group/coral-pytorch/blob/c6ab93afd555a6eac708c95ae1feafa15f91c5aa/coral_pytorch/losses.py
+    """
+    if not logits.shape == levels.shape:
+        raise ValueError(
+            "Please ensure that logits (%s) has the same shape as levels (%s). " % (logits.shape, levels.shape)
+        )
+    term1 = t.nn.functional.logsigmoid(logits) * levels + (t.nn.functional.logsigmoid(logits) - logits) * (1 - levels)
+    if importance_weights is not None:
+        term1 *= importance_weights
+    val = -t.sum(term1, dim=1)
+    if reduction == "mean":
+        loss = t.mean(val)
+    elif reduction == "sum":
+        loss = t.sum(val)
+    elif reduction is None:
+        loss = val
+    else:
+        s = 'Invalid value for `reduction`. Should be "mean", ' '"sum", or None. Got %s' % reduction
+        raise ValueError(s)
+    return loss
+def corn_loss(logits, y_train, num_classes):
+    """Computes the CORN loss described in our forthcoming
+    'Deep Neural Networks for Rank Consistent Ordinal
+    Regression based on Conditional Probabilities'
+    manuscript.
+    Parameters
+    ----------
+    logits : torch.tensor, shape=(num_examples, num_classes-1)
+        Outputs of the CORN layer.
+    y_train : torch.tensor, shape=(num_examples)
+        Torch tensor containing the class labels.
+    num_classes : int
+        Number of unique class labels (class labels should start at 0).
+    Returns
+    ----------
+        loss : torch.tensor
+        A torch.tensor containing a single loss value.
+    Examples
+    ----------
+    >>> import torch
+    >>> from coral_pytorch.losses import corn_loss
+    >>> # Consider 8 training examples
+    >>> _  = torch.manual_seed(123)
+    >>> X_train = torch.rand(8, 99)
+    >>> y_train = torch.tensor([0, 1, 2, 2, 2, 3, 4, 4])
+    >>> NUM_CLASSES = 5
+    >>> #
+    >>> #
+    >>> # def __init__(self):
+    >>> corn_net = torch.nn.Linear(99, NUM_CLASSES-1)
+    >>> #
+    >>> #
+    >>> # def forward(self, X_train):
+    >>> logits = corn_net(X_train)
+    >>> logits.shape
+    torch.Size([8, 4])
+    >>> corn_loss(logits, y_train, NUM_CLASSES)
+    tensor(0.7127, grad_fn=<DivBackward0>)
+    https://github.com/Raschka-research-group/coral-pytorch/blob/c6ab93afd555a6eac708c95ae1feafa15f91c5aa/coral_pytorch/losses.py
+    """
+    sets = []
+    for i in range(num_classes - 1):
+        label_mask = y_train > i - 1
+        label_tensor = (y_train[label_mask] > i).to(t.int64)
+        sets.append((label_mask, label_tensor))
+    num_examples = 0
+    losses = 0.0
+    for task_index, s in enumerate(sets):
+        train_examples = s[0]
+        train_labels = s[1]
+        if len(train_labels) < 1:
+            continue
+        num_examples += len(train_labels)
+        pred = logits[train_examples, task_index]
+        loss = -t.sum(
+            t.nn.functional.logsigmoid(pred) * train_labels
+            + (t.nn.functional.logsigmoid(pred) - pred) * (1 - train_labels)
+        )
+        losses += loss
+    return losses / num_examples
+def corn_label_from_logits(logits):
+    """
+    Returns the predicted rank label from logits for a
+    network trained via the CORN loss.
+    Parameters
+    ----------
+    logits : torch.tensor, shape=(n_examples, n_classes)
+        Torch tensor consisting of logits returned by the
+        neural net.
+    Returns
+    ----------
+    labels : torch.tensor, shape=(n_examples)
+        Integer tensor containing the predicted rank (class) labels
+    Examples
+    ----------
+    >>> # 2 training examples, 5 classes
+    >>> logits = torch.tensor([[14.152, -6.1942, 0.47710, 0.96850],
+    ...                        [65.667, 0.303, 11.500, -4.524]])
+    >>> corn_label_from_logits(logits)
+    tensor([1, 3])
+    https://github.com/Raschka-research-group/coral-pytorch/blob/c6ab93afd555a6eac708c95ae1feafa15f91c5aa/coral_pytorch/dataset.py
+    """
+    probas = t.sigmoid(logits)
+    probas = t.cumprod(probas, dim=1)
+    predict_levels = probas > 0.5
+    predicted_labels = t.sum(predict_levels, dim=1)
+    return predicted_labels

models.py ADDED Viewed

	@@ -0,0 +1,897 @@

+import timm
+import os
+from typing import Any
+from pytorch_lightning.utilities.types import LRSchedulerTypeUnion
+import torch as t
+from torch import nn
+import numpy as np
+import transformers
+import pytorch_lightning as plight
+import torchmetrics
+import einops as eo
+from loss_functions import coral_loss, corn_loss, corn_label_from_logits, macro_soft_f1
+t.set_float32_matmul_precision("medium")
+global_settings = dict(try_using_torch_compile=False)
+class EnsembleModel(plight.LightningModule):
+    def __init__(self, models_without_norm_df, models_with_norm_df, learning_rate=0.0002, use_simple_average=False):
+        super().__init__()
+        self.models_without_norm = nn.ModuleList(list(models_without_norm_df))
+        self.models_with_norm = nn.ModuleList(list(models_with_norm_df))
+        self.learning_rate = learning_rate
+        self.use_simple_average = use_simple_average
+        if not self.use_simple_average:
+            self.combiner = nn.Linear(
+                self.models_with_norm[0].num_classes * (len(self.models_with_norm) + len(self.models_without_norm)),
+                self.models_with_norm[0].num_classes,
+            )
+    def forward(self, x):
+        x_unnormed, x_normed = x
+        if not self.use_simple_average:
+            out_unnormed = t.cat([model.model_step(x_unnormed, 0)[0] for model in self.models_without_norm], dim=-1)
+            out_normed = t.cat([model.model_step(x_normed, 0)[0] for model in self.models_with_norm], dim=-1)
+            out_avg = self.combiner(t.cat((out_unnormed, out_normed), dim=-1))
+        else:
+            out_unnormed = [model.model_step(x_unnormed, 0)[0] for model in self.models_without_norm]
+            out_normed = [model.model_step(x_normed, 0)[0] for model in self.models_with_norm]
+            out_avg = (t.stack(out_unnormed + out_normed, dim=-1) / 2).mean(-1)
+        return {"out_avg": out_avg, "out_unnormed": out_unnormed, "out_normed": out_normed}, x_unnormed[-1]
+    def training_step(self, batch, batch_idx):
+        out, y = self(batch)
+        loss = self.models_with_norm[0]._get_loss(out["out_avg"], y, batch[0])
+        self.log("train_loss", loss, on_epoch=True, on_step=True, sync_dist=True)
+        return loss
+    def validation_step(self, batch, batch_idx):
+        out, y = self(batch)
+        preds, y_onecold, ignore_index_val = self.models_with_norm[0]._get_preds_reals(out["out_avg"], y)
+        acc = torchmetrics.functional.accuracy(
+            preds,
+            y_onecold.to(t.long),
+            ignore_index=ignore_index_val,
+            num_classes=self.models_with_norm[0].num_classes,
+            task="multiclass",
+        )
+        self.log("acc", acc * 100, prog_bar=True, sync_dist=True)
+        loss = self.models_with_norm[0]._get_loss(out["out_avg"], y, batch[0])
+        self.log("val_loss", loss, prog_bar=True, sync_dist=True)
+        return loss
+    def predict_step(self, batch, batch_idx: int, dataloader_idx: int = 0):
+        out, y = self(batch)
+        preds, y_onecold, ignore_index_val = self.models_with_norm[0]._get_preds_reals(out["out_avg"], y)
+        return preds, out, y_onecold
+    def configure_optimizers(self):
+        return t.optim.Adam(self.parameters(), lr=self.learning_rate)
+class TimmHeadReplace(nn.Module):
+    def __init__(self, pooling=None, in_channels=512, pooling_output_dimension=1, all_identity=False) -> None:
+        super().__init__()
+        if all_identity:
+            self.head = nn.Identity()
+            self.pooling = None
+        else:
+            self.pooling = pooling
+            if pooling is not None:
+                self.pooling_output_dimension = pooling_output_dimension
+                if self.pooling == "AdaptiveAvgPool2d":
+                    self.pooling_layer = nn.AdaptiveAvgPool2d(pooling_output_dimension)
+                elif self.pooling == "AdaptiveMaxPool2d":
+                    self.pooling_layer = nn.AdaptiveMaxPool2d(pooling_output_dimension)
+            self.head = nn.Flatten()
+    def forward(self, x, pre_logits=False):
+        if self.pooling is not None:
+            if self.pooling == "stack_avg_max_attn":
+                x = t.cat([layer(x) for layer in self.pooling_layer], dim=-1)
+            else:
+                x = self.pooling_layer(x)
+        return self.head(x)
+class CVModel(nn.Module):
+    def __init__(
+        self,
+        modelname,
+        in_shape,
+        num_classes,
+        loss_func,
+        last_activation: str,
+        input_padding_val=10,
+        char_dims=2,
+        max_seq_length=1000,
+    ) -> None:
+        super().__init__()
+        self.modelname = modelname
+        self.loss_func = loss_func
+        self.in_shape = in_shape
+        self.char_dims = char_dims
+        self.x_shape = in_shape
+        self.last_activation = last_activation
+        self.max_seq_length = max_seq_length
+        self.num_classes = num_classes
+        if self.loss_func == "OrdinalRegLoss":
+            self.out_shape = 1
+        else:
+            self.out_shape = num_classes
+        self.cv_model = timm.create_model(modelname, pretrained=True, num_classes=0)
+        self.cv_model.classifier = nn.Identity()
+        with t.inference_mode():
+            test_out = self.cv_model(t.ones(self.in_shape, dtype=t.float32))
+        self.cv_model_out_dim = test_out.shape[1]
+        self.cv_model.classifier = nn.Sequential(nn.Flatten(), nn.Linear(self.cv_model_out_dim, self.max_seq_length))
+        if self.out_shape == 1:
+            self.logit_norm = nn.Identity()
+            self.out_project = nn.Identity()
+        else:
+            self.logit_norm = nn.LayerNorm(self.max_seq_length)
+            self.out_project = nn.Linear(1, self.out_shape)
+        if last_activation == "Softmax":
+            self.final_activation = nn.Softmax(dim=-1)
+        elif last_activation == "Sigmoid":
+            self.final_activation = nn.Sigmoid()
+        elif last_activation == "LogSigmoid":
+            self.final_activation = nn.LogSigmoid()
+        elif last_activation == "Identity":
+            self.final_activation = nn.Identity()
+        else:
+            raise NotImplementedError(f"{last_activation} not implemented")
+    def forward(self, x):
+        if isinstance(x, list):
+            x = x[0]
+        x = self.cv_model(x)
+        x = self.cv_model.classifier(x).unsqueeze(-1)
+        x = self.out_project(x)
+        return self.final_activation(x)
+class LitModel(plight.LightningModule):
+    def __init__(
+        self,
+        in_shape: tuple,
+        hidden_dim: int,
+        num_attention_heads: int,
+        num_layers: int,
+        loss_func: str,
+        learning_rate: float,
+        weight_decay: float,
+        cfg: dict,
+        use_lr_warmup: bool,
+        use_reduce_on_plateau: bool,
+        track_gradient_histogram=False,
+        register_forw_hook=False,
+        char_dims=2,
+    ) -> None:
+        super().__init__()
+        if "only_use_2nd_input_stream" not in cfg:
+            cfg["only_use_2nd_input_stream"] = False
+        if "gamma_step_size" not in cfg:
+            cfg["gamma_step_size"] = 5
+        if "gamma_step_factor" not in cfg:
+            cfg["gamma_step_factor"] = 0.5
+        self.save_hyperparameters(
+            dict(
+                in_shape=in_shape,
+                hidden_dim=hidden_dim,
+                num_attention_heads=num_attention_heads,
+                num_layers=num_layers,
+                loss_func=loss_func,
+                learning_rate=learning_rate,
+                cfg=cfg,
+                x_shape=in_shape,
+                num_classes=cfg["num_classes"],
+                use_lr_warmup=use_lr_warmup,
+                num_warmup_steps=cfg["num_warmup_steps"],
+                use_reduce_on_plateau=use_reduce_on_plateau,
+                weight_decay=weight_decay,
+                track_gradient_histogram=track_gradient_histogram,
+                register_forw_hook=register_forw_hook,
+                char_dims=char_dims,
+                remove_timm_classifier_head_pooling=cfg["remove_timm_classifier_head_pooling"],
+                change_pooling_for_timm_head_to=cfg["change_pooling_for_timm_head_to"],
+                chars_conv_pooling_out_dim=cfg["chars_conv_pooling_out_dim"],
+            )
+        )
+        self.model_to_use = cfg["model_to_use"]
+        self.num_classes = cfg["num_classes"]
+        self.x_shape = in_shape
+        self.in_shape = in_shape
+        self.hidden_dim = hidden_dim
+        self.num_attention_heads = num_attention_heads
+        self.num_layers = num_layers
+        self.use_lr_warmup = use_lr_warmup
+        self.num_warmup_steps = cfg["num_warmup_steps"]
+        self.warmup_exponent = cfg["warmup_exponent"]
+        self.use_reduce_on_plateau = use_reduce_on_plateau
+        self.loss_func = loss_func
+        self.learning_rate = learning_rate
+        self.weight_decay = weight_decay
+        self.using_one_hot_targets = cfg["one_hot_y"]
+        self.track_gradient_histogram = track_gradient_histogram
+        self.register_forw_hook = register_forw_hook
+        if self.loss_func == "OrdinalRegLoss":
+            self.ord_reg_loss_max = cfg["ord_reg_loss_max"]
+            self.ord_reg_loss_min = cfg["ord_reg_loss_min"]
+        self.num_lin_layers = cfg["num_lin_layers"]
+        self.linear_activation = cfg["linear_activation"]
+        self.last_activation = cfg["last_activation"]
+        self.max_seq_length = cfg["manual_max_sequence_for_model"]
+        self.use_char_embed_info = cfg["use_embedded_char_pos_info"]
+        self.method_chars_into_model = cfg["method_chars_into_model"]
+        self.source_for_pretrained_cv_model = cfg["source_for_pretrained_cv_model"]
+        self.method_to_include_char_positions = cfg["method_to_include_char_positions"]
+        self.char_dims = char_dims
+        self.char_sequence_length = cfg["max_len_chars_list"] if self.use_char_embed_info else 0
+        self.chars_conv_lr_reduction_factor = cfg["chars_conv_lr_reduction_factor"]
+        if self.use_char_embed_info:
+            self.chars_bert_reduction_factor = cfg["chars_bert_reduction_factor"]
+        self.use_in_projection_bias = cfg["use_in_projection_bias"]
+        self.add_layer_norm_to_in_projection = cfg["add_layer_norm_to_in_projection"]
+        self.hidden_dropout_prob = cfg["hidden_dropout_prob"]
+        self.layer_norm_after_in_projection = cfg["layer_norm_after_in_projection"]
+        self.method_chars_into_model = cfg["method_chars_into_model"]
+        self.input_padding_val = cfg["input_padding_val"]
+        self.cv_char_modelname = cfg["cv_char_modelname"]
+        self.char_plot_shape = cfg["char_plot_shape"]
+        self.remove_timm_classifier_head_pooling = cfg["remove_timm_classifier_head_pooling"]
+        self.change_pooling_for_timm_head_to = cfg["change_pooling_for_timm_head_to"]
+        self.chars_conv_pooling_out_dim = cfg["chars_conv_pooling_out_dim"]
+        self.add_layer_norm_to_char_mlp = cfg["add_layer_norm_to_char_mlp"]
+        if "profile_torch_run" in cfg:
+            self.profile_torch_run = cfg["profile_torch_run"]
+        else:
+            self.profile_torch_run = False
+        if self.loss_func == "OrdinalRegLoss":
+            self.out_shape = 1
+        else:
+            self.out_shape = cfg["num_classes"]
+        if not self.hparams.cfg["only_use_2nd_input_stream"]:
+            if (
+                self.method_chars_into_model == "dense"
+                and self.use_char_embed_info
+                and self.method_to_include_char_positions == "concat"
+            ):
+                self.project = nn.Linear(self.x_shape[-1], self.hidden_dim // 2, bias=self.use_in_projection_bias)
+            elif (
+                self.method_chars_into_model == "bert"
+                and self.use_char_embed_info
+                and self.method_to_include_char_positions == "concat"
+            ):
+                self.hidden_dim_chars = self.hidden_dim // 2
+                self.project = nn.Linear(self.x_shape[-1], self.hidden_dim_chars, bias=self.use_in_projection_bias)
+            elif (
+                self.method_chars_into_model == "resnet"
+                and self.method_to_include_char_positions == "concat"
+                and self.use_char_embed_info
+            ):
+                self.project = nn.Linear(self.x_shape[-1], self.hidden_dim // 2, bias=self.use_in_projection_bias)
+            elif self.model_to_use == "cv_only_model":
+                self.project = nn.Identity()
+            else:
+                self.project = nn.Linear(self.x_shape[-1], self.hidden_dim, bias=self.use_in_projection_bias)
+            if self.add_layer_norm_to_in_projection:
+                self.project = nn.Sequential(
+                    nn.Linear(self.project.in_features, self.project.out_features, bias=self.use_in_projection_bias),
+                    nn.LayerNorm(self.project.out_features),
+                )
+        if hasattr(self, "project") and "posix" in os.name and global_settings["try_using_torch_compile"]:
+            self.project = t.compile(self.project)
+        if self.use_char_embed_info:
+            self._create_char_model()
+        if self.layer_norm_after_in_projection:
+            if self.hparams.cfg["only_use_2nd_input_stream"]:
+                self.layer_norm_in = nn.LayerNorm(self.hidden_dim // 2)
+            else:
+                self.layer_norm_in = nn.LayerNorm(self.hidden_dim)
+            if "posix" in os.name and global_settings["try_using_torch_compile"]:
+                self.layer_norm_in = t.compile(self.layer_norm_in)
+        self._create_main_seq_model(cfg)
+        if register_forw_hook:
+            self.register_hooks()
+        if self.hparams.cfg["only_use_2nd_input_stream"]:
+            linear_in_dim = self.hidden_dim // 2
+        else:
+            linear_in_dim = self.hidden_dim
+        if self.num_lin_layers == 1:
+            self.linear = nn.Linear(linear_in_dim, self.out_shape)
+        else:
+            lin_layers = []
+            for _ in range(self.num_lin_layers - 1):
+                lin_layers.extend(
+                    [
+                        nn.Linear(linear_in_dim, linear_in_dim),
+                        getattr(nn, self.linear_activation)(),
+                    ]
+                )
+            self.linear = nn.Sequential(*lin_layers, nn.Linear(linear_in_dim, self.out_shape))
+        if "posix" in os.name and global_settings["try_using_torch_compile"]:
+            self.linear = t.compile(self.linear)
+        if self.last_activation == "Softmax":
+            self.final_activation = nn.Softmax(dim=-1)
+        elif self.last_activation == "Sigmoid":
+            self.final_activation = nn.Sigmoid()
+        elif self.last_activation == "Identity":
+            self.final_activation = nn.Identity()
+        else:
+            raise NotImplementedError(f"{self.last_activation} not implemented")
+        if self.profile_torch_run:
+            self.profilerr = t.profiler.profile(
+                schedule=t.profiler.schedule(wait=1, warmup=10, active=10, repeat=1),
+                on_trace_ready=t.profiler.tensorboard_trace_handler("tblogs"),
+                with_stack=True,
+                record_shapes=True,
+                profile_memory=False,
+            )
+    def _create_main_seq_model(self, cfg):
+        if self.hparams.cfg["only_use_2nd_input_stream"]:
+            hidden_dim = self.hidden_dim // 2
+        else:
+            hidden_dim = self.hidden_dim
+        if self.model_to_use == "BERT":
+            self.bert_config = transformers.BertConfig(
+                vocab_size=self.x_shape[-1],
+                hidden_size=hidden_dim,
+                num_hidden_layers=self.num_layers,
+                intermediate_size=hidden_dim,
+                num_attention_heads=self.num_attention_heads,
+                max_position_embeddings=self.max_seq_length,
+            )
+            self.bert_model = transformers.BertModel(self.bert_config)
+        elif self.model_to_use == "cv_only_model":
+            self.bert_model = CVModel(
+                modelname=cfg["cv_modelname"],
+                in_shape=self.in_shape,
+                num_classes=cfg["num_classes"],
+                loss_func=cfg["loss_function"],
+                last_activation=cfg["last_activation"],
+                input_padding_val=cfg["input_padding_val"],
+                char_dims=self.char_dims,
+                max_seq_length=cfg["manual_max_sequence_for_model"],
+            )
+        else:
+            raise NotImplementedError(f"{self.model_to_use} not implemented")
+        if "posix" in os.name and global_settings["try_using_torch_compile"]:
+            self.bert_model = t.compile(self.bert_model)
+        return 0
+    def _create_char_model(self):
+        if self.method_chars_into_model == "dense":
+            self.chars_project_0 = nn.Linear(self.char_dims, 1, bias=self.use_in_projection_bias)
+            if "posix" in os.name and global_settings["try_using_torch_compile"]:
+                self.chars_project_0 = t.compile(self.chars_project_0)
+            if self.method_to_include_char_positions == "concat":
+                self.chars_project_1 = nn.Linear(
+                    self.char_sequence_length, self.hidden_dim // 2, bias=self.use_in_projection_bias
+                )
+            else:
+                self.chars_project_1 = nn.Linear(
+                    self.char_sequence_length, self.hidden_dim, bias=self.use_in_projection_bias
+                )
+            if "posix" in os.name and global_settings["try_using_torch_compile"]:
+                self.chars_project_1 = t.compile(self.chars_project_1)
+        elif not self.method_chars_into_model == "resnet":
+            self.chars_project = nn.Linear(self.char_dims, self.hidden_dim_chars, bias=self.use_in_projection_bias)
+            if "posix" in os.name and global_settings["try_using_torch_compile"]:
+                self.chars_project = t.compile(self.chars_project)
+        if self.method_chars_into_model == "bert":
+            if not hasattr(self, "hidden_dim_chars"):
+                if self.hidden_dim // self.chars_bert_reduction_factor > 1:
+                    self.hidden_dim_chars = self.hidden_dim // self.chars_bert_reduction_factor
+                else:
+                    self.hidden_dim_chars = self.hidden_dim
+            self.num_attention_heads_chars = self.hidden_dim_chars // (self.hidden_dim // self.num_attention_heads)
+            self.chars_bert_config = transformers.BertConfig(
+                vocab_size=self.x_shape[-1],
+                hidden_size=self.hidden_dim_chars,
+                num_hidden_layers=self.num_layers,
+                intermediate_size=self.hidden_dim_chars,
+                num_attention_heads=self.num_attention_heads_chars,
+                max_position_embeddings=self.char_sequence_length + 1,
+                num_labels=1,
+            )
+            self.chars_bert = transformers.BertForSequenceClassification(self.chars_bert_config)
+            if "posix" in os.name and global_settings["try_using_torch_compile"]:
+                self.chars_bert = t.compile(self.chars_bert)
+            self.chars_project_class_output = nn.Linear(1, self.hidden_dim_chars, bias=self.use_in_projection_bias)
+            if "posix" in os.name and global_settings["try_using_torch_compile"]:
+                self.chars_project_class_output = t.compile(self.chars_project_class_output)
+        elif self.method_chars_into_model == "resnet":
+            if self.source_for_pretrained_cv_model == "timm":
+                self.chars_conv = timm.create_model(
+                    self.cv_char_modelname,
+                    pretrained=True,
+                    num_classes=0,  # remove classifier nn.Linear
+                )
+                if self.remove_timm_classifier_head_pooling:
+                    self.chars_conv.head = TimmHeadReplace(all_identity=True)
+                    with t.inference_mode():
+                        test_out = self.chars_conv(
+                            t.ones((1, 3, self.char_plot_shape[0], self.char_plot_shape[1]), dtype=t.float32)
+                        )
+                    if test_out.ndim > 3:
+                        self.chars_conv.head = TimmHeadReplace(
+                            self.change_pooling_for_timm_head_to,
+                            test_out.shape[1],
+                        )
+            elif self.source_for_pretrained_cv_model == "huggingface":
+                self.chars_conv = transformers.AutoModelForImageClassification.from_pretrained(self.cv_char_modelname)
+            elif self.source_for_pretrained_cv_model == "torch_hub":
+                self.chars_conv = t.hub.load(*self.cv_char_modelname.split(","))
+            if hasattr(self.chars_conv, "classifier"):
+                self.chars_conv.classifier = nn.Identity()
+            elif hasattr(self.chars_conv, "cls_classifier"):
+                self.chars_conv.cls_classifier = nn.Identity()
+            elif hasattr(self.chars_conv, "fc"):
+                self.chars_conv.fc = nn.Identity()
+            if hasattr(self.chars_conv, "distillation_classifier"):
+                self.chars_conv.distillation_classifier = nn.Identity()
+            with t.inference_mode():
+                test_out = self.chars_conv(
+                    t.ones((1, 3, self.char_plot_shape[0], self.char_plot_shape[1]), dtype=t.float32)
+                )
+            if hasattr(test_out, "last_hidden_state"):
+                self.chars_conv_out_dim = test_out.last_hidden_state.shape[1]
+            elif hasattr(test_out, "logits"):
+                self.chars_conv_out_dim = test_out.logits.shape[1]
+            elif isinstance(test_out, list):
+                self.chars_conv_out_dim = test_out[0].shape[1]
+            else:
+                self.chars_conv_out_dim = test_out.shape[1]
+            char_lin_layers = [nn.Flatten(), nn.Linear(self.chars_conv_out_dim, self.hidden_dim // 2)]
+            if self.add_layer_norm_to_char_mlp:
+                char_lin_layers.append(nn.LayerNorm(self.hidden_dim // 2))
+            self.chars_classifier = nn.Sequential(*char_lin_layers)
+            if hasattr(self.chars_conv, "distillation_classifier"):
+                self.chars_conv.distillation_classifier = nn.Sequential(
+                    nn.Flatten(), nn.Linear(self.chars_conv_out_dim, self.hidden_dim // 2)
+                )
+            if "posix" in os.name and global_settings["try_using_torch_compile"]:
+                self.chars_classifier = t.compile(self.chars_classifier)
+            if "posix" in os.name and global_settings["try_using_torch_compile"]:
+                self.chars_conv = t.compile(self.chars_conv)
+        return 0
+    def register_hooks(self):
+        def add_to_tb(layer):
+            def hook(model, input, output):
+                if hasattr(output, "detach"):
+                    for logger in self.loggers:
+                        if hasattr(logger.experiment, "add_histogram"):
+                            logger.experiment.add_histogram(
+                                tag=f"{layer}_{str(list(output.shape))}",
+                                values=output.detach(),
+                                global_step=self.trainer.global_step,
+                            )
+            return hook
+        for layer_id, layer in dict([*self.named_modules()]).items():
+            layer.register_forward_hook(add_to_tb(f"act_{layer_id}"))
+    def on_after_backward(self) -> None:
+        if self.track_gradient_histogram:
+            if self.trainer.global_step % 200 == 0:
+                for logger in self.loggers:
+                    if hasattr(logger.experiment, "add_histogram"):
+                        for layer_id, layer in dict([*self.named_modules()]).items():
+                            parameters = layer.parameters()
+                            for idx2, p in enumerate(parameters):
+                                grad_val = p.grad
+                                if grad_val is not None:
+                                    grad_name = f"grad_{idx2}_{layer_id}_{str(list(p.grad.shape))}"
+                                    logger.experiment.add_histogram(
+                                        tag=grad_name, values=grad_val, global_step=self.trainer.global_step
+                                    )
+        return super().on_after_backward()
+    def _fold_in_seq_dim(self, out, y):
+        batch_size, seq_len, num_classes = out.shape
+        out = eo.rearrange(out, "b s c -> (b s) c", s=seq_len)
+        if y is None:
+            return out, None
+        if len(y.shape) > 2:
+            y = eo.rearrange(y, "b s c -> (b s) c", s=seq_len)
+        else:
+            y = eo.rearrange(y, "b s -> (b s)", s=seq_len)
+        return out, y
+    def _get_loss(self, out, y, batch):
+        attention_mask = batch[-2]
+        if self.loss_func == "BCELoss":
+            if self.last_activation == "Identity":
+                loss = t.nn.functional.binary_cross_entropy_with_logits(out, y, reduction="none")
+            else:
+                loss = t.nn.functional.binary_cross_entropy(out, y, reduction="none")
+            replace_tensor = t.zeros(loss[1, 1, :].shape, device=loss.device, dtype=loss.dtype, requires_grad=False)
+            loss[~attention_mask.bool()] = replace_tensor
+            loss = loss.mean()
+        elif self.loss_func == "CrossEntropyLoss":
+            if len(out.shape) > 2:
+                out, y = self._fold_in_seq_dim(out, y)
+                loss = t.nn.functional.cross_entropy(out, y, reduction="mean", ignore_index=-100)
+            else:
+                loss = t.nn.functional.cross_entropy(out, y, reduction="mean", ignore_index=-100)
+        elif self.loss_func == "OrdinalRegLoss":
+            loss = t.nn.functional.mse_loss(out, y, reduction="none")
+            loss = loss[attention_mask.bool()].sum() * 10.0 / attention_mask.sum()
+        elif self.loss_func == "macro_soft_f1":
+            loss = macro_soft_f1(y, out, reduction="mean")
+        elif self.loss_func == "coral_loss":
+            loss = coral_loss(out, y)
+        elif self.loss_func == "corn_loss":
+            out, y = self._fold_in_seq_dim(out, y)
+            loss = corn_loss(out, y.squeeze(), self.out_shape)
+        else:
+            raise ValueError("Loss Function not reckognized")
+        return loss
+    def training_step(self, batch, batch_idx):
+        if self.profile_torch_run:
+            self.profilerr.step()
+        out, y = self.model_step(batch, batch_idx)
+        loss = self._get_loss(out, y, batch)
+        self.log("train_loss", loss, on_epoch=True, on_step=True, sync_dist=True)
+        return loss
+    def forward(*args):
+        return forward(args[0], args[1:])
+    def model_step(self, batch, batch_idx):
+        out = self.forward(batch)
+        return out, batch[-1]
+    def optimizer_step(
+        self,
+        epoch,
+        batch_idx,
+        optimizer,
+        optimizer_closure,
+    ):
+        optimizer.step(closure=optimizer_closure)
+        if self.use_lr_warmup and self.hparams["cfg"]["lr_scheduling"] != "OneCycleLR":
+            if self.trainer.global_step < self.num_warmup_steps:
+                lr_scale = min(1.0, float(self.trainer.global_step + 1) / self.num_warmup_steps) ** self.warmup_exponent
+                for pg in optimizer.param_groups:
+                    pg["lr"] = lr_scale * self.hparams.learning_rate
+        if self.trainer.global_step % 10 == 0 or self.trainer.global_step == 0:
+            for idx, pg in enumerate(optimizer.param_groups):
+                self.log(f"lr_{idx}", pg["lr"], prog_bar=True, sync_dist=True)
+    def lr_scheduler_step(self, scheduler: LRSchedulerTypeUnion, metric: Any | None) -> None:
+        if self.use_lr_warmup and self.hparams["cfg"]["lr_scheduling"] != "OneCycleLR":
+            if self.trainer.global_step > self.num_warmup_steps:
+                if metric is None:
+                    scheduler.step()
+                else:
+                    scheduler.step(metric)
+        else:
+            if metric is None:
+                scheduler.step()
+            else:
+                scheduler.step(metric)
+    def _get_preds_reals(self, out, y):
+        if self.loss_func == "corn_loss":
+            seq_len = out.shape[1]
+            out, y = self._fold_in_seq_dim(out, y)
+            preds = corn_label_from_logits(out)
+            preds = eo.rearrange(preds, "(b s) -> b s", s=seq_len)
+            if y is not None:
+                y = eo.rearrange(y.squeeze(), "(b s) -> b s", s=seq_len)
+        elif self.loss_func == "OrdinalRegLoss":
+            preds = out * (self.ord_reg_loss_max - self.ord_reg_loss_min)
+            preds = (preds + self.ord_reg_loss_min).round().to(t.long)
+        else:
+            preds = t.argmax(out, dim=-1)
+        if y is None:
+            return preds, y, -100
+        else:
+            if self.using_one_hot_targets:
+                y_onecold = t.argmax(y, dim=-1)
+                ignore_index_val = 0
+            elif self.loss_func == "OrdinalRegLoss":
+                y_onecold = (y * self.num_classes).round().to(t.long)
+                y_onecold = y * (self.ord_reg_loss_max - self.ord_reg_loss_min)
+                y_onecold = (y_onecold + self.ord_reg_loss_min).round().to(t.long)
+                ignore_index_val = t.min(y_onecold).to(t.long)
+            else:
+                y_onecold = y
+                ignore_index_val = -100
+            if len(preds.shape) > len(y_onecold.shape):
+                preds = preds.squeeze()
+            return preds, y_onecold, ignore_index_val
+    def validation_step(self, batch, batch_idx):
+        out, y = self.model_step(batch, batch_idx)
+        preds, y_onecold, ignore_index_val = self._get_preds_reals(out, y)
+        if self.loss_func == "OrdinalRegLoss":
+            y_onecold = y_onecold.flatten()
+            preds = preds.flatten()[y_onecold != ignore_index_val]
+            y_onecold = y_onecold[y_onecold != ignore_index_val]
+            acc = (preds == y_onecold).sum() / len(y_onecold)
+        else:
+            acc = torchmetrics.functional.accuracy(
+                preds,
+                y_onecold.to(t.long),
+                ignore_index=ignore_index_val,
+                num_classes=self.num_classes,
+                task="multiclass",
+            )
+            self.log("acc", acc * 100, prog_bar=True, sync_dist=True)
+        loss = self._get_loss(out, y, batch)
+        self.log("val_loss", loss, prog_bar=True, sync_dist=True)
+        return loss
+    def predict_step(self, batch, batch_idx):
+        out, y = self.model_step(batch, batch_idx)
+        preds, y_onecold, ignore_index_val = self._get_preds_reals(out, y)
+        return preds, y_onecold
+    def configure_optimizers(self):
+        params = list(self.named_parameters())
+        def is_chars_conv(n):
+            if "chars_conv" not in n:
+                return False
+            if "chars_conv" in n and "classifier" in n:
+                return False
+            else:
+                return True
+        grouped_parameters = [
+            {
+                "params": [p for n, p in params if is_chars_conv(n)],
+                "lr": self.learning_rate / self.chars_conv_lr_reduction_factor,
+                "weight_decay": self.weight_decay,
+            },
+            {
+                "params": [p for n, p in params if not is_chars_conv(n)],
+                "lr": self.learning_rate,
+                "weight_decay": self.weight_decay,
+            },
+        ]
+        opti = t.optim.AdamW(grouped_parameters, lr=self.learning_rate, weight_decay=self.weight_decay)
+        if self.use_reduce_on_plateau:
+            opti_dict = {
+                "optimizer": opti,
+                "lr_scheduler": {
+                    "scheduler": t.optim.lr_scheduler.ReduceLROnPlateau(opti, mode="min", patience=2, factor=0.5),
+                    "monitor": "val_loss",
+                    "frequency": 1,
+                    "interval": "epoch",
+                },
+            }
+            return opti_dict
+        else:
+            cfg = self.hparams["cfg"]
+            if cfg["use_reduce_on_plateau"]:
+                scheduler = None
+            elif cfg["lr_scheduling"] == "multistep":
+                scheduler = t.optim.lr_scheduler.MultiStepLR(
+                    opti, milestones=cfg["multistep_milestones"], gamma=cfg["gamma_multistep"], verbose=False
+                )
+                interval = "step" if cfg["use_training_steps_for_end_and_lr_decay"] else "epoch"
+            elif cfg["lr_scheduling"] == "StepLR":
+                scheduler = t.optim.lr_scheduler.StepLR(
+                    opti, step_size=cfg["gamma_step_size"], gamma=cfg["gamma_step_factor"]
+                )
+                interval = "step" if cfg["use_training_steps_for_end_and_lr_decay"] else "epoch"
+            elif cfg["lr_scheduling"] == "anneal":
+                scheduler = t.optim.lr_scheduler.CosineAnnealingLR(
+                    opti, 250, eta_min=cfg["min_lr_anneal"], last_epoch=-1, verbose=False
+                )
+                interval = "step"
+            elif cfg["lr_scheduling"] == "ExponentialLR":
+                scheduler = t.optim.lr_scheduler.ExponentialLR(opti, gamma=cfg["lr_sched_exp_fac"])
+                interval = "step"
+            else:
+                scheduler = None
+            if scheduler is None:
+                return [opti]
+            else:
+                opti_dict = {
+                    "optimizer": opti,
+                    "lr_scheduler": {
+                        "scheduler": scheduler,
+                        "monitor": "global_step",
+                        "frequency": 1,
+                        "interval": interval,
+                    },
+                }
+                return opti_dict
+    def on_fit_start(self) -> None:
+        if self.profile_torch_run:
+            self.profilerr.start()
+        return super().on_fit_start()
+    def on_fit_end(self) -> None:
+        if self.profile_torch_run:
+            self.profilerr.stop()
+        return super().on_fit_end()
+def prep_model_input(self, batch):
+    if len(batch) == 1:
+        batch = batch[0]
+    if self.use_char_embed_info:
+        if len(batch) == 5:
+            x, chars_coords, ims, attention_mask, _ = batch
+        elif batch[1].ndim == 4:
+            x, ims, attention_mask, _ = batch
+        else:
+            x, chars_coords, attention_mask, _ = batch
+        padding_list = None
+    else:
+        if len(batch) > 3:
+            x = batch[0]
+            y = batch[-1]
+            attention_mask = batch[1]
+        else:
+            x, attention_mask, y = batch
+    if self.model_to_use != "cv_only_model" and not self.hparams.cfg["only_use_2nd_input_stream"]:
+        x_embedded = self.project(x)
+    else:
+        x_embedded = x
+    if self.use_char_embed_info:
+        if self.method_chars_into_model == "dense":
+            bool_mask = chars_coords == self.input_padding_val
+            bool_mask = bool_mask[:, :, 0]
+            chars_coords_projected = self.chars_project_0(chars_coords).squeeze(-1)
+            chars_coords_projected = chars_coords_projected * bool_mask
+            if self.chars_project_1.in_features == chars_coords_projected.shape[-1]:
+                chars_coords_projected = self.chars_project_1(chars_coords_projected)
+            else:
+                chars_coords_projected = chars_coords_projected.mean(dim=-1)
+                chars_coords_projected = chars_coords_projected.unsqueeze(1).repeat(1, x_embedded.shape[2])
+        elif self.method_chars_into_model == "bert":
+            chars_mask = chars_coords != self.input_padding_val
+            chars_mask = t.cat(
+                (
+                    t.ones(chars_mask[:, :1, 0].shape, dtype=t.long, device=chars_coords.device),
+                    chars_mask[:, :, 0].to(t.long),
+                ),
+                dim=1,
+            )
+            chars_coords_projected = self.chars_project(chars_coords)
+            position_ids = t.arange(
+                0, chars_coords_projected.shape[1] + 1, dtype=t.long, device=chars_coords_projected.device
+            )
+            token_type_ids = t.zeros(
+                (chars_coords_projected.size()[0], chars_coords_projected.size()[1] + 1),
+                dtype=t.long,
+                device=chars_coords_projected.device,
+            )  # +1 for CLS
+            chars_coords_projected = t.cat(
+                (t.ones_like(chars_coords_projected[:, :1, :]), chars_coords_projected), dim=1
+            )  # to add CLS token
+            chars_coords_projected = self.chars_bert(
+                position_ids=position_ids,
+                inputs_embeds=chars_coords_projected,
+                token_type_ids=token_type_ids,
+                attention_mask=chars_mask,
+            )
+            if hasattr(chars_coords_projected, "last_hidden_state"):
+                chars_coords_projected = chars_coords_projected.last_hidden_state[:, 0, :]
+            elif hasattr(chars_coords_projected, "logits"):
+                chars_coords_projected = chars_coords_projected.logits
+            else:
+                chars_coords_projected = chars_coords_projected.hidden_states[-1][:, 0, :]
+        elif self.method_chars_into_model == "resnet":
+            chars_conv_out = self.chars_conv(ims)
+            if isinstance(chars_conv_out, list):
+                chars_conv_out = chars_conv_out[0]
+            if hasattr(chars_conv_out, "logits"):
+                chars_conv_out = chars_conv_out.logits
+            chars_coords_projected = self.chars_classifier(chars_conv_out)
+        chars_coords_projected = chars_coords_projected.unsqueeze(1).repeat(1, x_embedded.shape[1], 1)
+        if hasattr(self, "chars_project_class_output"):
+            chars_coords_projected = self.chars_project_class_output(chars_coords_projected)
+        if self.hparams.cfg["only_use_2nd_input_stream"]:
+            x_embedded = chars_coords_projected
+        elif self.method_to_include_char_positions == "concat":
+            x_embedded = t.cat((x_embedded, chars_coords_projected), dim=-1)
+        else:
+            x_embedded = x_embedded + chars_coords_projected
+    return x_embedded, attention_mask
+def forward(self, batch):
+    prepped_input = prep_model_input(self, batch)
+    if len(batch) > 5:
+        x_embedded, padding_list, attention_mask, attention_mask_for_prediction = prepped_input
+    elif len(batch) > 2:
+        x_embedded, attention_mask = prepped_input
+    else:
+        x_embedded = prepped_input[0]
+        attention_mask = prepped_input[-1]
+    position_ids = t.arange(0, x_embedded.shape[1], dtype=t.long, device=x_embedded.device)
+    token_type_ids = t.zeros(x_embedded.size()[:-1], dtype=t.long, device=x_embedded.device)
+    if self.layer_norm_after_in_projection:
+        x_embedded = self.layer_norm_in(x_embedded)
+    if self.model_to_use == "LSTM":
+        bert_out = self.bert_model(x_embedded)
+    elif self.model_to_use in ["ProphetNet", "T5", "FunnelModel"]:
+        bert_out = self.bert_model(inputs_embeds=x_embedded, attention_mask=attention_mask)
+    elif self.model_to_use == "xBERT":
+        bert_out = self.bert_model(x_embedded, mask=attention_mask.to(bool))
+    elif self.model_to_use == "cv_only_model":
+        bert_out = self.bert_model(x_embedded)
+    else:
+        bert_out = self.bert_model(
+            position_ids=position_ids,
+            inputs_embeds=x_embedded,
+            token_type_ids=token_type_ids,
+            attention_mask=attention_mask,
+        )
+    if hasattr(bert_out, "last_hidden_state"):
+        last_hidden_state = bert_out.last_hidden_state
+        out = self.linear(last_hidden_state)
+    elif hasattr(bert_out, "logits"):
+        out = bert_out.logits
+    else:
+        out = bert_out
+    out = self.final_activation(out)
+    return out

models/BERT_20240104-223349_loop_normalize_by_line_height_and_width_True_dataset_folder_idx_evaluation_8_epoch=41-val_loss=0.00430.ckpt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:9c4ae65e81c722f3732563942ab40447a186869bebb1bbc8433a782805e73ac3
+size 86691676

models/BERT_20240104-233803_loop_normalize_by_line_height_and_width_False_dataset_folder_idx_evaluation_8_epoch=41-val_loss=0.00719.ckpt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:a7588696e4afc4c8ffb0ff361d9566b7b360c61a3bb6fd6fcb484942b6d2568b
+size 86692053

models/BERT_20240107-152040_loop_restrict_sim_data_to_4000_dataset_folder_idx_evaluation_8_epoch=41-val_loss=0.00515.ckpt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:815b5500a1ae0a04bb55ae58c3896f07981757a2e1a2adf2cbc8a346551d88df
+size 86686270

models/BERT_20240108-000344_loop_normalize_by_line_height_and_width_False_dataset_folder_idx_evaluation_8_epoch=41-val_loss=0.00706.ckpt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:f2e56e1e33da611622315995e0cdf4db5aad6a086420401ca3ee95393b8977ac
+size 86692053

models/BERT_20240108-011230_loop_normalize_by_line_height_and_width_True_dataset_folder_idx_evaluation_8_epoch=41-val_loss=0.00560.ckpt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:4f060242cf0bc494d2908e0e99e9d411c9a9b131443cff91bb245229dad2f783
+size 86691676

models/BERT_20240109-090419_loop_normalize_by_line_height_and_width_False_dataset_folder_idx_evaluation_8_epoch=41-val_loss=0.00518.ckpt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:bbf23ac7baa88a957e1782158bd7a32aedcfcb0527b203079191ac259ec146c5
+size 86692053

models/BERT_20240122-183729_loop_normalize_by_line_height_and_width_True_dataset_folder_idx_evaluation_8_epoch=41-val_loss=0.00523.ckpt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:3fb7c8238752af51b64a23291080bb30edf9e090defcb2ec4015ddc8d543a9de
+size 86691740

models/BERT_20240122-194041_loop_normalize_by_line_height_and_width_False_dataset_folder_idx_evaluation_8_epoch=41-val_loss=0.00462.ckpt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:54fedcc5bdeda01bfae26bafcb7542c766807f1af9da7731aaa7ed38e93743d8
+size 86692117

models/BERT_fin_exp_20240104-223349.yaml ADDED Viewed

	@@ -0,0 +1,100 @@

+add_layer_norm_to_char_mlp: true
+add_layer_norm_to_in_projection: false
+add_line_overlap_feature: true
+add_normalised_values_as_features: false
+change_pooling_for_timm_head_to: AdaptiveAvgPool2d
+char_dims: 0
+char_plot_shape:
+- 224
+- 224
+chars_bert_reduction_factor: 4
+chars_conv_lr_reduction_factor: 1
+chars_conv_pooling_out_dim: 1
+convert_posix: false
+convert_winpath: false
+cv_char_modelname: coatnet_nano_rw_224
+cv_modelname: null
+early_stopping_patience: 15
+gamma_multistep: null
+gamma_step_factor: 0.5
+gamma_step_size: 3000
+head_multiplication_factor: 64
+hidden_dim_bert: 512
+hidden_dropout_prob: 0.0
+im_partial_string: fixations_chars_channel_sep
+input_padding_val: 10
+last_activation: Identity
+layer_norm_after_in_projection: true
+linear_activation: GELU
+load_best_checkpoint_at_end: false
+loss_function: corn_loss
+lr: 0.0004
+lr_initial: '0.0004'
+lr_sched_exp_fac: null
+lr_scheduling: StepLR
+manual_max_sequence_for_model: 500
+max_len_chars_list: 0
+max_seq_length: 500
+method_chars_into_model: resnet
+method_to_include_char_positions: concat
+min_lr_anneal: 1e-6
+model_to_use: BERT
+multistep_milestones: null
+n_layers_BERT: 4
+norm_by_char_averages: false
+norm_by_line_width: false
+norm_coords_by_letter_min_x_y: false
+normalize_by_line_height_and_width: true
+num_attention_heads: 8
+num_classes: 16
+num_lin_layers: 1
+num_warmup_steps: 3000
+one_hot_y: false
+ord_reg_loss_max: 16
+ord_reg_loss_min: -1
+padding_at_end: true
+plot_histogram: true
+plot_learning_curves: true
+precision: 16-mixed
+prediction_only: false
+pretrained_model_name_to_load: null
+profile_torch_run: false
+reload_model: false
+reload_model_date: null
+remove_eval_idx_from_train_idx: true
+remove_timm_classifier_head_pooling: true
+sample_cols:
+- x
+- y
+sample_means:
+- 0.7326
+- 6.6381
+- 2.4717
+sample_std:
+- 0.2778
+- 1.882
+- 1.8562
+sample_std_unscaled:
+- 285.193
+- 131.1842
+- 1.8562
+save_weights_only: true
+set_max_seq_len_manually: true
+set_num_classes_manually: true
+source_for_pretrained_cv_model: timm
+target_padding_number: -100
+track_activations_via_hook: false
+track_gradient_histogram: false
+use_char_bounding_boxes: true
+use_early_stopping: false
+use_embedded_char_pos_info: true
+use_fixation_duration_information: false
+use_in_projection_bias: false
+use_lr_warmup: true
+use_pupil_size_information: false
+use_reduce_on_plateau: false
+use_start_time_as_input_col: false
+use_training_steps_for_end_and_lr_decay: true
+use_words_coords: false
+warmup_exponent: 1
+weight_decay: 0.0

models/BERT_fin_exp_20240104-233803.yaml ADDED Viewed

	@@ -0,0 +1,100 @@

+add_layer_norm_to_char_mlp: true
+add_layer_norm_to_in_projection: false
+add_line_overlap_feature: true
+add_normalised_values_as_features: false
+change_pooling_for_timm_head_to: AdaptiveAvgPool2d
+char_dims: 0
+char_plot_shape:
+- 224
+- 224
+chars_bert_reduction_factor: 4
+chars_conv_lr_reduction_factor: 1
+chars_conv_pooling_out_dim: 1
+convert_posix: false
+convert_winpath: false
+cv_char_modelname: coatnet_nano_rw_224
+cv_modelname: null
+early_stopping_patience: 15
+gamma_multistep: null
+gamma_step_factor: 0.5
+gamma_step_size: 3000
+head_multiplication_factor: 64
+hidden_dim_bert: 512
+hidden_dropout_prob: 0.0
+im_partial_string: fixations_chars_channel_sep
+input_padding_val: 10
+last_activation: Identity
+layer_norm_after_in_projection: true
+linear_activation: GELU
+load_best_checkpoint_at_end: false
+loss_function: corn_loss
+lr: 0.0004
+lr_initial: '0.0004'
+lr_sched_exp_fac: null
+lr_scheduling: StepLR
+manual_max_sequence_for_model: 500
+max_len_chars_list: 0
+max_seq_length: 500
+method_chars_into_model: resnet
+method_to_include_char_positions: concat
+min_lr_anneal: 1e-6
+model_to_use: BERT
+multistep_milestones: null
+n_layers_BERT: 4
+norm_by_char_averages: false
+norm_by_line_width: false
+norm_coords_by_letter_min_x_y: false
+normalize_by_line_height_and_width: false
+num_attention_heads: 8
+num_classes: 16
+num_lin_layers: 1
+num_warmup_steps: 3000
+one_hot_y: false
+ord_reg_loss_max: 16
+ord_reg_loss_min: -1
+padding_at_end: true
+plot_histogram: true
+plot_learning_curves: true
+precision: 16-mixed
+prediction_only: false
+pretrained_model_name_to_load: null
+profile_torch_run: false
+reload_model: false
+reload_model_date: null
+remove_eval_idx_from_train_idx: true
+remove_timm_classifier_head_pooling: true
+sample_cols:
+- x
+- y
+sample_means:
+- 710.6114
+- 473.7518
+- 2.4717
+sample_std:
+- 285.1937
+- 131.1842
+- 1.8562
+sample_std_unscaled:
+- 285.193
+- 131.1842
+- 1.8562
+save_weights_only: true
+set_max_seq_len_manually: true
+set_num_classes_manually: true
+source_for_pretrained_cv_model: timm
+target_padding_number: -100
+track_activations_via_hook: false
+track_gradient_histogram: false
+use_char_bounding_boxes: true
+use_early_stopping: false
+use_embedded_char_pos_info: true
+use_fixation_duration_information: false
+use_in_projection_bias: false
+use_lr_warmup: true
+use_pupil_size_information: false
+use_reduce_on_plateau: false
+use_start_time_as_input_col: false
+use_training_steps_for_end_and_lr_decay: true
+use_words_coords: false
+warmup_exponent: 1
+weight_decay: 0.0

models/BERT_fin_exp_20240107-152040.yaml ADDED Viewed

	@@ -0,0 +1,100 @@

+add_layer_norm_to_char_mlp: true
+add_layer_norm_to_in_projection: false
+add_line_overlap_feature: true
+add_normalised_values_as_features: false
+change_pooling_for_timm_head_to: AdaptiveAvgPool2d
+char_dims: 0
+char_plot_shape:
+- 224
+- 224
+chars_bert_reduction_factor: 4
+chars_conv_lr_reduction_factor: 1
+chars_conv_pooling_out_dim: 1
+convert_posix: false
+convert_winpath: false
+cv_char_modelname: coatnet_nano_rw_224
+cv_modelname: null
+early_stopping_patience: 15
+gamma_multistep: null
+gamma_step_factor: 0.5
+gamma_step_size: 3000
+head_multiplication_factor: 64
+hidden_dim_bert: 512
+hidden_dropout_prob: 0.0
+im_partial_string: fixations_chars_channel_sep
+input_padding_val: 10
+last_activation: Identity
+layer_norm_after_in_projection: true
+linear_activation: GELU
+load_best_checkpoint_at_end: false
+loss_function: corn_loss
+lr: 0.0004
+lr_initial: '0.0004'
+lr_sched_exp_fac: null
+lr_scheduling: StepLR
+manual_max_sequence_for_model: 500
+max_len_chars_list: 0
+max_seq_length: 500
+method_chars_into_model: resnet
+method_to_include_char_positions: concat
+min_lr_anneal: 1e-6
+model_to_use: BERT
+multistep_milestones: null
+n_layers_BERT: 4
+norm_by_char_averages: false
+norm_by_line_width: false
+norm_coords_by_letter_min_x_y: true
+normalize_by_line_height_and_width: true
+num_attention_heads: 8
+num_classes: 16
+num_lin_layers: 1
+num_warmup_steps: 3000
+one_hot_y: false
+ord_reg_loss_max: 16
+ord_reg_loss_min: -1
+padding_at_end: true
+plot_histogram: true
+plot_learning_curves: true
+precision: 16-mixed
+prediction_only: false
+pretrained_model_name_to_load: null
+profile_torch_run: false
+reload_model: false
+reload_model_date: null
+remove_eval_idx_from_train_idx: true
+remove_timm_classifier_head_pooling: true
+sample_cols:
+- x
+- y
+sample_means:
+- 0.4423
+- 3.1164
+- 2.4717
+sample_std:
+- 0.2778
+- 1.882
+- 1.8562
+sample_std_unscaled:
+- 285.193
+- 131.1842
+- 1.8562
+save_weights_only: true
+set_max_seq_len_manually: true
+set_num_classes_manually: true
+source_for_pretrained_cv_model: timm
+target_padding_number: -100
+track_activations_via_hook: false
+track_gradient_histogram: false
+use_char_bounding_boxes: true
+use_early_stopping: false
+use_embedded_char_pos_info: true
+use_fixation_duration_information: false
+use_in_projection_bias: false
+use_lr_warmup: true
+use_pupil_size_information: false
+use_reduce_on_plateau: false
+use_start_time_as_input_col: false
+use_training_steps_for_end_and_lr_decay: true
+use_words_coords: false
+warmup_exponent: 1
+weight_decay: 0.0

models/BERT_fin_exp_20240108-000344.yaml ADDED Viewed

	@@ -0,0 +1,100 @@

+add_layer_norm_to_char_mlp: true
+add_layer_norm_to_in_projection: false
+add_line_overlap_feature: true
+add_normalised_values_as_features: false
+change_pooling_for_timm_head_to: AdaptiveAvgPool2d
+char_dims: 0
+char_plot_shape:
+- 224
+- 224
+chars_bert_reduction_factor: 4
+chars_conv_lr_reduction_factor: 1
+chars_conv_pooling_out_dim: 1
+convert_posix: false
+convert_winpath: true
+cv_char_modelname: coatnet_nano_rw_224
+cv_modelname: null
+early_stopping_patience: 15
+gamma_multistep: null
+gamma_step_factor: 0.5
+gamma_step_size: 3000
+head_multiplication_factor: 64
+hidden_dim_bert: 512
+hidden_dropout_prob: 0.0
+im_partial_string: fixations_chars_channel_sep
+input_padding_val: 10
+last_activation: Identity
+layer_norm_after_in_projection: true
+linear_activation: GELU
+load_best_checkpoint_at_end: false
+loss_function: corn_loss
+lr: 0.0004
+lr_initial: '0.0004'
+lr_sched_exp_fac: null
+lr_scheduling: StepLR
+manual_max_sequence_for_model: 500
+max_len_chars_list: 0
+max_seq_length: 500
+method_chars_into_model: resnet
+method_to_include_char_positions: concat
+min_lr_anneal: 1e-6
+model_to_use: BERT
+multistep_milestones: null
+n_layers_BERT: 4
+norm_by_char_averages: false
+norm_by_line_width: false
+norm_coords_by_letter_min_x_y: true
+normalize_by_line_height_and_width: false
+num_attention_heads: 8
+num_classes: 16
+num_lin_layers: 1
+num_warmup_steps: 3000
+one_hot_y: false
+ord_reg_loss_max: 16
+ord_reg_loss_min: -1
+padding_at_end: true
+plot_histogram: true
+plot_learning_curves: true
+precision: 16-mixed
+prediction_only: false
+pretrained_model_name_to_load: null
+profile_torch_run: false
+reload_model: false
+reload_model_date: null
+remove_eval_idx_from_train_idx: true
+remove_timm_classifier_head_pooling: true
+sample_cols:
+- x
+- y
+sample_means:
+- 455.5905
+- 218.0598
+- 2.4717
+sample_std:
+- 285.1936
+- 131.1842
+- 1.8562
+sample_std_unscaled:
+- 285.1939
+- 131.1844
+- 1.8562
+save_weights_only: true
+set_max_seq_len_manually: true
+set_num_classes_manually: true
+source_for_pretrained_cv_model: timm
+target_padding_number: -100
+track_activations_via_hook: false
+track_gradient_histogram: false
+use_char_bounding_boxes: true
+use_early_stopping: false
+use_embedded_char_pos_info: true
+use_fixation_duration_information: false
+use_in_projection_bias: false
+use_lr_warmup: true
+use_pupil_size_information: false
+use_reduce_on_plateau: false
+use_start_time_as_input_col: false
+use_training_steps_for_end_and_lr_decay: true
+use_words_coords: false
+warmup_exponent: 1
+weight_decay: 0.0

models/BERT_fin_exp_20240108-011230.yaml ADDED Viewed

	@@ -0,0 +1,100 @@

+add_layer_norm_to_char_mlp: true
+add_layer_norm_to_in_projection: false
+add_line_overlap_feature: true
+add_normalised_values_as_features: false
+change_pooling_for_timm_head_to: AdaptiveAvgPool2d
+char_dims: 0
+char_plot_shape:
+- 224
+- 224
+chars_bert_reduction_factor: 4
+chars_conv_lr_reduction_factor: 1
+chars_conv_pooling_out_dim: 1
+convert_posix: false
+convert_winpath: true
+cv_char_modelname: coatnet_nano_rw_224
+cv_modelname: null
+early_stopping_patience: 15
+gamma_multistep: null
+gamma_step_factor: 0.5
+gamma_step_size: 3000
+head_multiplication_factor: 64
+hidden_dim_bert: 512
+hidden_dropout_prob: 0.0
+im_partial_string: fixations_chars_channel_sep
+input_padding_val: 10
+last_activation: Identity
+layer_norm_after_in_projection: true
+linear_activation: GELU
+load_best_checkpoint_at_end: false
+loss_function: corn_loss
+lr: 0.0004
+lr_initial: '0.0004'
+lr_sched_exp_fac: null
+lr_scheduling: StepLR
+manual_max_sequence_for_model: 500
+max_len_chars_list: 0
+max_seq_length: 500
+method_chars_into_model: resnet
+method_to_include_char_positions: concat
+min_lr_anneal: 1e-6
+model_to_use: BERT
+multistep_milestones: null
+n_layers_BERT: 4
+norm_by_char_averages: false
+norm_by_line_width: false
+norm_coords_by_letter_min_x_y: true
+normalize_by_line_height_and_width: true
+num_attention_heads: 8
+num_classes: 16
+num_lin_layers: 1
+num_warmup_steps: 3000
+one_hot_y: false
+ord_reg_loss_max: 16
+ord_reg_loss_min: -1
+padding_at_end: true
+plot_histogram: true
+plot_learning_curves: true
+precision: 16-mixed
+prediction_only: false
+pretrained_model_name_to_load: null
+profile_torch_run: false
+reload_model: false
+reload_model_date: null
+remove_eval_idx_from_train_idx: true
+remove_timm_classifier_head_pooling: true
+sample_cols:
+- x
+- y
+sample_means:
+- 0.4423
+- 3.1164
+- 2.4717
+sample_std:
+- 0.2778
+- 1.882
+- 1.8562
+sample_std_unscaled:
+- 285.1939
+- 131.1844
+- 1.8562
+save_weights_only: true
+set_max_seq_len_manually: true
+set_num_classes_manually: true
+source_for_pretrained_cv_model: timm
+target_padding_number: -100
+track_activations_via_hook: false
+track_gradient_histogram: false
+use_char_bounding_boxes: true
+use_early_stopping: false
+use_embedded_char_pos_info: true
+use_fixation_duration_information: false
+use_in_projection_bias: false
+use_lr_warmup: true
+use_pupil_size_information: false
+use_reduce_on_plateau: false
+use_start_time_as_input_col: false
+use_training_steps_for_end_and_lr_decay: true
+use_words_coords: false
+warmup_exponent: 1
+weight_decay: 0.0

models/BERT_fin_exp_20240109-090419.yaml ADDED Viewed

	@@ -0,0 +1,100 @@

+add_layer_norm_to_char_mlp: true
+add_layer_norm_to_in_projection: false
+add_line_overlap_feature: true
+add_normalised_values_as_features: false
+change_pooling_for_timm_head_to: AdaptiveAvgPool2d
+char_dims: 0
+char_plot_shape:
+- 224
+- 224
+chars_bert_reduction_factor: 4
+chars_conv_lr_reduction_factor: 1
+chars_conv_pooling_out_dim: 1
+convert_posix: false
+convert_winpath: true
+cv_char_modelname: coatnet_nano_rw_224
+cv_modelname: null
+early_stopping_patience: 15
+gamma_multistep: null
+gamma_step_factor: 0.5
+gamma_step_size: 3000
+head_multiplication_factor: 64
+hidden_dim_bert: 512
+hidden_dropout_prob: 0.0
+im_partial_string: fixations_chars_channel_sep
+input_padding_val: 10
+last_activation: Identity
+layer_norm_after_in_projection: true
+linear_activation: GELU
+load_best_checkpoint_at_end: false
+loss_function: corn_loss
+lr: 0.0004
+lr_initial: '0.0004'
+lr_sched_exp_fac: null
+lr_scheduling: StepLR
+manual_max_sequence_for_model: 500
+max_len_chars_list: 0
+max_seq_length: 500
+method_chars_into_model: resnet
+method_to_include_char_positions: concat
+min_lr_anneal: 1e-6
+model_to_use: BERT
+multistep_milestones: null
+n_layers_BERT: 4
+norm_by_char_averages: false
+norm_by_line_width: false
+norm_coords_by_letter_min_x_y: true
+normalize_by_line_height_and_width: false
+num_attention_heads: 8
+num_classes: 16
+num_lin_layers: 1
+num_warmup_steps: 3000
+one_hot_y: false
+ord_reg_loss_max: 16
+ord_reg_loss_min: -1
+padding_at_end: true
+plot_histogram: true
+plot_learning_curves: true
+precision: 16-mixed
+prediction_only: false
+pretrained_model_name_to_load: null
+profile_torch_run: false
+reload_model: false
+reload_model_date: null
+remove_eval_idx_from_train_idx: true
+remove_timm_classifier_head_pooling: true
+sample_cols:
+- x
+- y
+sample_means:
+- 455.708
+- 217.8342
+- 2.4706
+sample_std:
+- 285.2534
+- 131.0263
+- 1.8542
+sample_std_unscaled:
+- 285.2527
+- 131.0262
+- 1.8543
+save_weights_only: true
+set_max_seq_len_manually: true
+set_num_classes_manually: true
+source_for_pretrained_cv_model: timm
+target_padding_number: -100
+track_activations_via_hook: false
+track_gradient_histogram: false
+use_char_bounding_boxes: true
+use_early_stopping: false
+use_embedded_char_pos_info: true
+use_fixation_duration_information: false
+use_in_projection_bias: false
+use_lr_warmup: true
+use_pupil_size_information: false
+use_reduce_on_plateau: false
+use_start_time_as_input_col: false
+use_training_steps_for_end_and_lr_decay: true
+use_words_coords: false
+warmup_exponent: 1
+weight_decay: 0.0

models/BERT_fin_exp_20240122-183729.yaml ADDED Viewed

	@@ -0,0 +1,102 @@

+add_layer_norm_to_char_mlp: true
+add_layer_norm_to_in_projection: false
+add_line_overlap_feature: true
+add_normalised_values_as_features: false
+add_woc_feature: false
+change_pooling_for_timm_head_to: AdaptiveAvgPool2d
+char_dims: 0
+char_plot_shape:
+- 224
+- 224
+chars_bert_reduction_factor: 4
+chars_conv_lr_reduction_factor: 1
+chars_conv_pooling_out_dim: 1
+convert_posix: false
+convert_winpath: false
+cv_char_modelname: coatnet_nano_rw_224
+cv_modelname: null
+early_stopping_patience: 15
+gamma_multistep: null
+gamma_step_factor: 0.5
+gamma_step_size: 3000
+head_multiplication_factor: 64
+hidden_dim_bert: 512
+hidden_dropout_prob: 0.0
+im_partial_string: fixations_chars_channel_sep
+input_padding_val: 10
+last_activation: Identity
+layer_norm_after_in_projection: true
+linear_activation: GELU
+load_best_checkpoint_at_end: false
+loss_function: corn_loss
+lr: 0.0004
+lr_initial: '0.0004'
+lr_sched_exp_fac: null
+lr_scheduling: StepLR
+manual_max_sequence_for_model: 500
+max_len_chars_list: 0
+max_seq_length: 500
+method_chars_into_model: resnet
+method_to_include_char_positions: concat
+min_lr_anneal: 1e-6
+model_to_use: BERT
+multistep_milestones: null
+n_layers_BERT: 4
+norm_by_char_averages: false
+norm_by_line_width: false
+norm_coords_by_letter_min_x_y: true
+normalize_by_line_height_and_width: true
+num_attention_heads: 8
+num_classes: 16
+num_lin_layers: 1
+num_warmup_steps: 3000
+one_hot_y: false
+only_use_2nd_input_stream: false
+ord_reg_loss_max: 16
+ord_reg_loss_min: -1
+padding_at_end: true
+plot_histogram: true
+plot_learning_curves: true
+precision: 16-mixed
+prediction_only: false
+pretrained_model_name_to_load: null
+profile_torch_run: false
+reload_model: false
+reload_model_date: null
+remove_eval_idx_from_train_idx: true
+remove_timm_classifier_head_pooling: true
+sample_cols:
+- x
+- y
+sample_means:
+- 0.4433
+- 2.9599
+- 2.3264
+sample_std:
+- 0.2782
+- 1.7872
+- 1.7619
+sample_std_unscaled:
+- 287.0107
+- 124.4113
+- 1.7619
+save_weights_only: true
+set_max_seq_len_manually: true
+set_num_classes_manually: true
+source_for_pretrained_cv_model: timm
+target_padding_number: -100
+track_activations_via_hook: false
+track_gradient_histogram: false
+use_char_bounding_boxes: true
+use_early_stopping: false
+use_embedded_char_pos_info: true
+use_fixation_duration_information: false
+use_in_projection_bias: false
+use_lr_warmup: true
+use_pupil_size_information: false
+use_reduce_on_plateau: false
+use_start_time_as_input_col: false
+use_training_steps_for_end_and_lr_decay: true
+use_words_coords: false
+warmup_exponent: 1
+weight_decay: 0.0

models/BERT_fin_exp_20240122-194041.yaml ADDED Viewed

	@@ -0,0 +1,102 @@

+add_layer_norm_to_char_mlp: true
+add_layer_norm_to_in_projection: false
+add_line_overlap_feature: true
+add_normalised_values_as_features: false
+add_woc_feature: false
+change_pooling_for_timm_head_to: AdaptiveAvgPool2d
+char_dims: 0
+char_plot_shape:
+- 224
+- 224
+chars_bert_reduction_factor: 4
+chars_conv_lr_reduction_factor: 1
+chars_conv_pooling_out_dim: 1
+convert_posix: false
+convert_winpath: false
+cv_char_modelname: coatnet_nano_rw_224
+cv_modelname: null
+early_stopping_patience: 15
+gamma_multistep: null
+gamma_step_factor: 0.5
+gamma_step_size: 3000
+head_multiplication_factor: 64
+hidden_dim_bert: 512
+hidden_dropout_prob: 0.0
+im_partial_string: fixations_chars_channel_sep
+input_padding_val: 10
+last_activation: Identity
+layer_norm_after_in_projection: true
+linear_activation: GELU
+load_best_checkpoint_at_end: false
+loss_function: corn_loss
+lr: 0.0004
+lr_initial: '0.0004'
+lr_sched_exp_fac: null
+lr_scheduling: StepLR
+manual_max_sequence_for_model: 500
+max_len_chars_list: 0
+max_seq_length: 500
+method_chars_into_model: resnet
+method_to_include_char_positions: concat
+min_lr_anneal: 1e-6
+model_to_use: BERT
+multistep_milestones: null
+n_layers_BERT: 4
+norm_by_char_averages: false
+norm_by_line_width: false
+norm_coords_by_letter_min_x_y: true
+normalize_by_line_height_and_width: false
+num_attention_heads: 8
+num_classes: 16
+num_lin_layers: 1
+num_warmup_steps: 3000
+one_hot_y: false
+only_use_2nd_input_stream: false
+ord_reg_loss_max: 16
+ord_reg_loss_min: -1
+padding_at_end: true
+plot_histogram: true
+plot_learning_curves: true
+precision: 16-mixed
+prediction_only: false
+pretrained_model_name_to_load: null
+profile_torch_run: false
+reload_model: false
+reload_model_date: null
+remove_eval_idx_from_train_idx: true
+remove_timm_classifier_head_pooling: true
+sample_cols:
+- x
+- y
+sample_means:
+- 459.3367
+- 206.88
+- 2.3264
+sample_std:
+- 287.0111
+- 124.4113
+- 1.7619
+sample_std_unscaled:
+- 287.0107
+- 124.4113
+- 1.7619
+save_weights_only: true
+set_max_seq_len_manually: true
+set_num_classes_manually: true
+source_for_pretrained_cv_model: timm
+target_padding_number: -100
+track_activations_via_hook: false
+track_gradient_histogram: false
+use_char_bounding_boxes: true
+use_early_stopping: false
+use_embedded_char_pos_info: true
+use_fixation_duration_information: false
+use_in_projection_bias: false
+use_lr_warmup: true
+use_pupil_size_information: false
+use_reduce_on_plateau: false
+use_start_time_as_input_col: false
+use_training_steps_for_end_and_lr_decay: true
+use_words_coords: false
+warmup_exponent: 1
+weight_decay: 0.0

requirements.txt ADDED Viewed

	@@ -0,0 +1,25 @@

+datasets
+einops
+matplotlib
+numpy
+pandas
+PyYAML
+seaborn
+tqdm
+transformers==4.30.2
+tensorboard
+torchmetrics
+pytorch-lightning
+scikit-learn
+plotly
+lovely-tensors
+timm
+openpyxl
+torch==2.*
+pydantic==1.10
+streamlit
+pycairo
+eyekit
+stqdm
+jellyfish
+icecream

run_in_notebook.ipynb ADDED Viewed

The diff for this file is too large to render. See raw diff

utils.py ADDED Viewed

	@@ -0,0 +1,2016 @@

+import zipfile
+import os
+import plotly.express as px
+import plotly.graph_objects as go
+from torch.utils.data.dataloader import DataLoader as dl
+import yaml
+from io import StringIO
+import torch as t
+import numpy as np
+import pandas as pd
+from torch.utils.data import Dataset as torch_dset
+from PIL import Image
+import torchvision.transforms.functional as tvfunc
+import json
+from matplotlib import pyplot as plt
+import matplotlib.patches as patches
+from matplotlib.font_manager import FontProperties
+import pathlib as pl
+import matplotlib as mpl
+import streamlit as st
+from streamlit.runtime.uploaded_file_manager import UploadedFile
+import einops as eo
+import copy
+# import stqdm
+from tqdm.auto import tqdm
+import time
+import requests
+from matplotlib.patches import Rectangle
+from matplotlib import font_manager
+from models import LitModel, EnsembleModel
+from loss_functions import corn_label_from_logits
+import classic_correction_algos as calgo
+import analysis_funcs as anf
+TEMP_FOLDER = pl.Path("results")
+AVAILABLE_FONTS = [x.name for x in font_manager.fontManager.ttflist]
+PLOTS_FOLDER = pl.Path("plots")
+TEMP_FIGURE_STIMULUS_PATH = PLOTS_FOLDER / "temp_matplotlib_plot_stimulus.png"
+all_fonts = [x.name for x in font_manager.fontManager.ttflist]
+mpl.use("agg")
+DIST_MODELS_FOLDER = pl.Path("models")
+IMAGENET_MEAN = [0.485, 0.456, 0.406]
+IMAGENET_STD = [0.229, 0.224, 0.225]
+gradio_plots = pl.Path("plots")
+event_strs = [
+    "EFIX",
+    "EFIX R",
+    "EFIX L",
+    "SSACC",
+    "ESACC",
+    "SFIX",
+    "MSG",
+    "SBLINK",
+    "EBLINK",
+    "BUTTON",
+    "INPUT",
+    "END",
+    "START",
+    "DISPLAY ON",
+]
+names_dict = {
+    "SSACC": {"Descr": "Start of Saccade", "Pattern": "SSACC <eye > <stime>"},
+    "ESACC": {
+        "Descr": "End of Saccade",
+        "Pattern": "ESACC <eye > <stime> <etime > <dur> <sxp > <syp> <exp > <eyp> <ampl > <pv >",
+    },
+    "SFIX": {"Descr": "Start of Fixation", "Pattern": "SFIX <eye > <stime>"},
+    "EFIX": {"Descr": "End of Fixation", "Pattern": "EFIX <eye > <stime> <etime > <dur> <axp > <ayp> <aps >"},
+    "SBLINK": {"Descr": "Start of Blink", "Pattern": "SBLINK <eye > <stime>"},
+    "EBLINK": {"Descr": "End of Blink", "Pattern": "EBLINK <eye > <stime> <etime > <dur>"},
+    "DISPLAY ON": {"Descr": "Actual start of Trial", "Pattern": "DISPLAY ON"},
+}
+metadata_strs = ["DISPLAY COORDS", "GAZE_COORDS", "FRAMERATE"]
+ALGO_CHOICES = st.session_state["ALGO_CHOICES"] = [
+    "warp",
+    "regress",
+    "compare",
+    "attach",
+    "segment",
+    "split",
+    "stretch",
+    "chain",
+    "slice",
+    "cluster",
+    "merge",
+    "Wisdom_of_Crowds",
+    "DIST",
+    "DIST-Ensemble",
+    "Wisdom_of_Crowds_with_DIST",
+    "Wisdom_of_Crowds_with_DIST_Ensemble",
+]
+COLORS = px.colors.qualitative.Alphabet
+class NumpyEncoder(json.JSONEncoder):
+    "From https://stackoverflow.com/questions/26646362/numpy-array-is-not-json-serializable"
+    def default(self, obj):
+        if isinstance(obj, np.ndarray):
+            return obj.tolist()
+        elif isinstance(obj, pl.Path) or isinstance(obj, UploadedFile):
+            return str(obj)
+        return json.JSONEncoder.default(self, obj)
+class DSet(torch_dset):
+    def __init__(
+        self,
+        in_sequence: t.Tensor,
+        chars_center_coords_padded: t.Tensor,
+        out_categories: t.Tensor,
+        trialslist: list,
+        padding_list: list = None,
+        padding_at_end: bool = False,
+        return_images_for_conv: bool = False,
+        im_partial_string: str = "fixations_chars_channel_sep",
+        input_im_shape=[224, 224],
+    ) -> None:
+        super().__init__()
+        self.in_sequence = in_sequence
+        self.chars_center_coords_padded = chars_center_coords_padded
+        self.out_categories = out_categories
+        self.padding_list = padding_list
+        self.padding_at_end = padding_at_end
+        self.trialslist = trialslist
+        self.return_images_for_conv = return_images_for_conv
+        self.input_im_shape = input_im_shape
+        if return_images_for_conv:
+            self.im_partial_string = im_partial_string
+            self.plot_files = [
+                str(x["plot_file"]).replace("fixations_words", im_partial_string) for x in self.trialslist
+            ]
+    def __getitem__(self, index):
+        if self.return_images_for_conv:
+            im = Image.open(self.plot_files[index])
+            if [im.size[1], im.size[0]] != self.input_im_shape:
+                im = tvfunc.resize(im, self.input_im_shape)
+            im = tvfunc.normalize(tvfunc.to_tensor(im), IMAGENET_MEAN, IMAGENET_STD)
+        if self.chars_center_coords_padded is not None:
+            if self.padding_list is not None:
+                attention_mask = t.ones(self.in_sequence[index].shape[:-1], dtype=t.long)
+                if self.padding_at_end:
+                    if self.padding_list[index] > 0:
+                        attention_mask[-self.padding_list[index] :] = 0
+                else:
+                    attention_mask[: self.padding_list[index]] = 0
+                if self.return_images_for_conv:
+                    return (
+                        self.in_sequence[index],
+                        self.chars_center_coords_padded[index],
+                        im,
+                        attention_mask,
+                        self.out_categories[index],
+                    )
+                return (
+                    self.in_sequence[index],
+                    self.chars_center_coords_padded[index],
+                    attention_mask,
+                    self.out_categories[index],
+                )
+            else:
+                if self.return_images_for_conv:
+                    return (
+                        self.in_sequence[index],
+                        self.chars_center_coords_padded[index],
+                        im,
+                        self.out_categories[index],
+                    )
+                else:
+                    return (self.in_sequence[index], self.chars_center_coords_padded[index], self.out_categories[index])
+        if self.padding_list is not None:
+            attention_mask = t.ones(self.in_sequence[index].shape[:-1], dtype=t.long)
+            if self.padding_at_end:
+                if self.padding_list[index] > 0:
+                    attention_mask[-self.padding_list[index] :] = 0
+            else:
+                attention_mask[: self.padding_list[index]] = 0
+            if self.return_images_for_conv:
+                return (self.in_sequence[index], im, attention_mask, self.out_categories[index])
+            else:
+                return (self.in_sequence[index], attention_mask, self.out_categories[index])
+        if self.return_images_for_conv:
+            return (self.in_sequence[index], im, self.out_categories[index])
+        else:
+            return (self.in_sequence[index], self.out_categories[index])
+    def __len__(self):
+        if isinstance(self.in_sequence, t.Tensor):
+            return self.in_sequence.shape[0]
+        else:
+            return len(self.in_sequence)
+def download_url(url, target_filename):
+    r = requests.get(url)
+    open(target_filename, "wb").write(r.content)
+    return 0
+def asc_to_trial_ids(asc_file, close_gap_between_words=True):
+    if "logger" in st.session_state:
+        st.session_state["logger"].debug("asc_to_trial_ids entered")
+    asc_encoding = ["ISO-8859-15", "UTF-8"][0]
+    trials_dict, lines = file_to_trials_and_lines(
+        asc_file, asc_encoding, close_gap_between_words=close_gap_between_words
+    )
+    trials_by_ids = {trials_dict[idx]["trial_id"]: trials_dict[idx] for idx in trials_dict["paragraph_trials"]}
+    if hasattr(asc_file, "name"):
+        if "logger" in st.session_state:
+            st.session_state["logger"].info(f"Found {len(trials_by_ids)} trials in {asc_file.name}.")
+    return trials_by_ids, lines
+def get_trials_list(asc_file=None, close_gap_between_words=True):
+    if "logger" in st.session_state:
+        st.session_state["logger"].debug("get_trials_list entered")
+    if asc_file == None:
+        if "single_asc_file" in st.session_state.keys() and st.session_state["single_asc_file"] is not None:
+            asc_file = st.session_state["single_asc_file"]
+        else:
+            if "logger" in st.session_state:
+                st.session_state["logger"].warning("Asc file is None")
+            return None
+    if hasattr(asc_file, "name"):
+        if "logger" in st.session_state:
+            st.session_state["logger"].info(f"get_trials_list entered with asc_file {asc_file.name}")
+    trials_by_ids, lines = asc_to_trial_ids(asc_file, close_gap_between_words=close_gap_between_words)
+    trial_keys = list(trials_by_ids.keys())
+    return trial_keys, trials_by_ids, lines, asc_file
+def save_trial_to_json(trial, savename):
+    if "dffix" in trial:
+        trial.pop("dffix")
+    with open(savename, "w", encoding="utf-8") as f:
+        json.dump(trial, f, ensure_ascii=False, indent=4, cls=NumpyEncoder)
+def export_csv(dffix, trial):
+    if isinstance(dffix, dict):
+        dffix = dffix["value"]
+    trial_id = trial["trial_id"]
+    savename = TEMP_FOLDER.joinpath(pl.Path(trial["fname"]).stem)
+    trial_name = f"{savename}_{trial_id}_trial_info.json"
+    csv_name = f"{savename}_{trial_id}.csv"
+    dffix.to_csv(csv_name)
+    if "logger" in st.session_state:
+        st.session_state["logger"].info(f"Saved processed data as {csv_name}")
+    save_trial_to_json(trial, trial_name)
+    if "logger" in st.session_state:
+        st.session_state["logger"].info(f"Saved processed trial data as {trial_name}")
+    return csv_name, trial_name
+def get_all_classic_preds(dffix, trial, classic_algos_cfg):
+    corrections = []
+    for algo, classic_params in copy.deepcopy(classic_algos_cfg).items():
+        dffix = calgo.apply_classic_algo(dffix, trial, algo, classic_params)
+        corrections.append(np.asarray(dffix.loc[:, f"y_{algo}"]))
+    return dffix, corrections
+def apply_woc(dffix, trial, corrections, algo_choice):
+    corrected_Y = calgo.wisdom_of_the_crowd(corrections)
+    dffix.loc[:, f"y_{algo_choice}"] = corrected_Y
+    dffix[f"y_{algo_choice}_correction"] = (dffix.loc[:, f"y_{algo_choice}"] - dffix.loc[:, "y"]).round(1)
+    corrected_line_nums = [trial["y_char_unique"].index(y) for y in corrected_Y]
+    dffix.loc[:, f"line_num_y_{algo_choice}"] = corrected_line_nums
+    return dffix
+def calc_xdiff_ydiff(line_xcoords_no_pad, line_ycoords_no_pad, line_heights, allow_multiple_values=False):
+    x_diffs = np.unique(np.diff(line_xcoords_no_pad))
+    if len(x_diffs) == 1:
+        x_diff = x_diffs[0]
+    elif not allow_multiple_values:
+        x_diff = np.min(x_diffs)
+    else:
+        x_diff = x_diffs
+    if np.unique(line_ycoords_no_pad).shape[0] == 1:
+        return x_diff, line_heights[0]
+    y_diffs = np.unique(np.diff(line_ycoords_no_pad))
+    if len(y_diffs) == 1:
+        y_diff = y_diffs[0]
+    elif len(y_diffs) == 0:
+        y_diff = 0
+    elif not allow_multiple_values:
+        y_diff = np.min(y_diffs)
+    else:
+        y_diff = y_diffs
+    return x_diff, y_diff
+def add_words(trial, close_gap_between_words=True):
+    chars_list_reconstructed = []
+    words_list = []
+    word_start_idx = 0
+    chars_df = pd.DataFrame(trial["chars_list"])
+    chars_df["char_width"] = chars_df.char_xmax - chars_df.char_xmin
+    space_width = chars_df.loc[chars_df["char"] == " ", "char_width"].mean()
+    for idx, char_dict in enumerate(trial["chars_list"]):
+        on_line_num = char_dict["assigned_line"]
+        chars_list_reconstructed.append(char_dict)
+        if (
+            char_dict["char"] in [" ", ",", ";", ".", ":"]
+            or (
+                len(chars_list_reconstructed) > 2
+                and (chars_list_reconstructed[-1]["char_xmin"] < chars_list_reconstructed[-2]["char_xmin"])
+            )
+            or len(chars_list_reconstructed) == len(trial["chars_list"])
+        ):
+            triggered = True
+            word_xmin = chars_list_reconstructed[word_start_idx]["char_xmin"]
+            word_xmax = chars_list_reconstructed[-2]["char_xmax"]
+            word_ymin = chars_list_reconstructed[word_start_idx]["char_ymin"]
+            word_ymax = chars_list_reconstructed[word_start_idx]["char_ymax"]
+            word_x_center = (word_xmax - word_xmin) / 2 + word_xmin
+            word_y_center = (word_ymax - word_ymin) / 2 + word_ymin
+            word = "".join(
+                [
+                    chars_list_reconstructed[idx]["char"]
+                    for idx in range(word_start_idx, len(chars_list_reconstructed) - 1)
+                ]
+            )
+            assigned_line = chars_list_reconstructed[word_start_idx]["assigned_line"]
+            word_dict = dict(
+                word=word,
+                word_xmin=word_xmin,
+                word_xmax=word_xmax,
+                word_ymin=word_ymin,
+                word_ymax=word_ymax,
+                word_x_center=word_x_center,
+                word_y_center=word_y_center,
+                assigned_line=assigned_line,
+            )
+            if char_dict["char"] != " ":
+                word_start_idx = idx
+            else:
+                word_start_idx = idx + 1
+            words_list.append(word_dict)
+        else:
+            triggered = False
+    last_letter_in_word = word_dict["word"][-1]
+    last_letter_in_chars_list_reconstructed = char_dict["char"]
+    if last_letter_in_word != last_letter_in_chars_list_reconstructed:
+        word_dict = dict(
+            word=char_dict["char"],
+            word_xmin=char_dict["char_xmin"],
+            word_xmax=char_dict["char_xmax"],
+            word_ymin=char_dict["char_ymin"],
+            word_ymax=char_dict["char_ymax"],
+            word_x_center=char_dict["char_x_center"],
+            word_y_center=char_dict["char_y_center"],
+            assigned_line=assigned_line,
+        )
+        words_list.append(word_dict)
+    if close_gap_between_words:
+        for widx in range(1, len(words_list)):
+            if words_list[widx]["assigned_line"] == words_list[widx - 1]["assigned_line"]:
+                word_sep_half_width = (words_list[widx]["word_xmin"] - words_list[widx - 1]["word_xmax"]) / 2
+                words_list[widx - 1]["word_xmax"] = words_list[widx - 1]["word_xmax"] + word_sep_half_width
+                words_list[widx]["word_xmin"] = words_list[widx]["word_xmin"] - word_sep_half_width
+    return words_list
+def asc_lines_to_trials_by_trail_id(
+    lines: list, paragraph_trials_only=False, fname: str = "", close_gap_between_words=True
+) -> dict:
+    if hasattr(fname, "name"):
+        fname = fname.name
+    fps = -999
+    display_coords = -999
+    trials_dict = dict(paragraph_trials=[], paragraph_trial_IDs=[])
+    trial_idx = -1
+    removed_trial_ids = []
+    for idx, l in enumerate(lines):
+        parts = l.strip().split(" ")
+        if "TRIALID" in l:
+            trial_id = parts[-1]
+            trial_idx += 1
+            if trial_id[0] == "F":
+                trial_is = "question"
+            elif trial_id[0] == "P":
+                trial_is = "practice"
+            else:
+                trial_is = "paragraph"
+                trials_dict["paragraph_trials"].append(trial_idx)
+                trials_dict["paragraph_trial_IDs"].append(trial_id)
+            trials_dict[trial_idx] = dict(trial_id=trial_id, trial_id_idx=idx, trial_is=trial_is, filename=fname)
+            last_trial_skipped = False
+        elif "TRIAL_RESULT" in l or "stop_trial" in l:
+            trials_dict[trial_idx]["trial_result_idx"] = idx
+            trials_dict[trial_idx]["trial_result_timestamp"] = int(parts[0].split("\t")[1])
+            if len(parts) > 2:
+                trials_dict[trial_idx]["trial_result_number"] = int(parts[2])
+        elif "DISPLAY COORDS" in l and isinstance(display_coords, int):
+            display_coords = (float(parts[-4]), float(parts[-3]), float(parts[-2]), float(parts[-1]))
+        elif "GAZE_COORDS" in l and isinstance(display_coords, int):
+            display_coords = (float(parts[-4]), float(parts[-3]), float(parts[-2]), float(parts[-1]))
+        elif "FRAMERATE" in l:
+            l_idx = parts.index(metadata_strs[2])
+            fps = float(parts[l_idx + 1])
+        elif "TRIAL ABORTED" in l or "TRIAL REPEATED" in l:
+            if not last_trial_skipped:
+                if trial_is == "paragraph":
+                    trials_dict["paragraph_trials"].remove(trial_idx)
+                trial_idx -= 1
+                removed_trial_ids.append(trial_id)
+                last_trial_skipped = True
+    if paragraph_trials_only:
+        trials_dict_temp = trials_dict.copy()
+        for k in trials_dict_temp.keys():
+            if k not in ["paragraph_trials"] + trials_dict_temp["paragraph_trials"]:
+                trials_dict.pop(k)
+        if len(trials_dict_temp["paragraph_trials"]):
+            trial_idx = trials_dict_temp["paragraph_trials"][-1]
+        else:
+            return trials_dict
+    trials_dict["display_coords"] = display_coords
+    trials_dict["fps"] = fps
+    trials_dict["max_trial_idx"] = trial_idx
+    enum = trials_dict["paragraph_trials"] if "paragraph_trials" in trials_dict.keys() else range(len(trials_dict))
+    for trial_idx in enum:
+        if trial_idx not in trials_dict.keys():
+            continue
+        chars_list = []
+        if "display_coords" not in trials_dict[trial_idx].keys():
+            trials_dict[trial_idx]["display_coords"] = trials_dict["display_coords"]
+        trial_start_idx = trials_dict[trial_idx]["trial_id_idx"]
+        trial_end_idx = trials_dict[trial_idx]["trial_result_idx"]
+        trial_lines = lines[trial_start_idx:trial_end_idx]
+        for idx, l in enumerate(trial_lines):
+            parts = l.strip().split(" ")
+            if "START" in l and " MSG" not in l:
+                trials_dict[trial_idx]["start_idx"] = trial_start_idx + idx + 7
+                trials_dict[trial_idx]["start_time"] = int(parts[0].split("\t")[1])
+            elif "END" in l and "ENDBUTTON" not in l and " MSG" not in l:
+                trials_dict[trial_idx]["end_idx"] = trial_start_idx + idx - 2
+                trials_dict[trial_idx]["end_time"] = int(parts[0].split("\t")[1])
+            elif "SYNCTIME" in l:
+                trials_dict[trial_idx]["synctime"] = trial_start_idx + idx
+                trials_dict[trial_idx]["synctime_time"] = int(parts[0].split("\t")[1])
+            elif "GAZE TARGET OFF" in l:
+                trials_dict[trial_idx]["gaze_targ_off_time"] = int(parts[0].split("\t")[1])
+            elif "GAZE TARGET ON" in l:
+                trials_dict[trial_idx]["gaze_targ_on_time"] = int(parts[0].split("\t")[1])
+            elif "DISPLAY_SENTENCE" in l:  # some .asc files seem to use this
+                trials_dict[trial_idx]["gaze_targ_on_time"] = int(parts[0].split("\t")[1])
+            elif "REGION CHAR" in l:
+                rg_idx = parts.index("CHAR")
+                if len(parts[rg_idx:]) > 8:
+                    char = " "
+                    idx_correction = 1
+                elif len(parts[rg_idx:]) == 3:
+                    char = " "
+                    if "REGION CHAR" not in trial_lines[idx + 1]:
+                        parts = trial_lines[idx + 1].strip().split(" ")
+                        idx_correction = -rg_idx - 4
+                else:
+                    char = parts[rg_idx + 3]
+                    idx_correction = 0
+                try:
+                    char_dict = {
+                        "char": char,
+                        "char_xmin": float(parts[rg_idx + 4 + idx_correction]),
+                        "char_ymin": float(parts[rg_idx + 5 + idx_correction]),
+                        "char_xmax": float(parts[rg_idx + 6 + idx_correction]),
+                        "char_ymax": float(parts[rg_idx + 7 + idx_correction]),
+                    }
+                    char_dict["char_y_center"] = (char_dict["char_ymax"] - char_dict["char_ymin"]) / 2 + char_dict[
+                        "char_ymin"
+                    ]
+                    char_dict["char_x_center"] = (char_dict["char_xmax"] - char_dict["char_xmin"]) / 2 + char_dict[
+                        "char_xmin"
+                    ]
+                    chars_list.append(char_dict)
+                except Exception as e:
+                    if "logger" in st.session_state:
+                        st.session_state["logger"].warning(f"char_dict creation failed for parts {parts}")
+                    if "logger" in st.session_state:
+                        st.session_state["logger"].warning(e)
+        if "gaze_targ_on_time" in trials_dict[trial_idx]:
+            trials_dict[trial_idx]["trial_start_time"] = trials_dict[trial_idx]["gaze_targ_on_time"]
+        else:
+            trials_dict[trial_idx]["trial_start_time"] = trials_dict[trial_idx]["start_time"]
+        if len(chars_list) > 0:
+            line_ycoords = []
+            for idx in range(len(chars_list)):
+                chars_list[idx]["char_line_y"] = (
+                    chars_list[idx]["char_ymax"] - chars_list[idx]["char_ymin"]
+                ) / 2 + chars_list[idx]["char_ymin"]
+                if chars_list[idx]["char_line_y"] not in line_ycoords:
+                    line_ycoords.append(chars_list[idx]["char_line_y"])
+            for idx in range(len(chars_list)):
+                chars_list[idx]["assigned_line"] = line_ycoords.index(chars_list[idx]["char_line_y"])
+            line_heights = [x["char_ymax"] - x["char_ymin"] for x in chars_list]
+            line_xcoords_all = [x["char_x_center"] for x in chars_list]
+            line_xcoords_no_pad = np.unique(line_xcoords_all)
+            line_ycoords_all = [x["char_y_center"] for x in chars_list]
+            line_ycoords_no_pad = np.unique(line_ycoords_all)
+            trials_dict[trial_idx]["x_char_unique"] = list(line_xcoords_no_pad)
+            trials_dict[trial_idx]["y_char_unique"] = list(line_ycoords_no_pad)
+            x_diff, y_diff = calc_xdiff_ydiff(
+                line_xcoords_no_pad, line_ycoords_no_pad, line_heights, allow_multiple_values=False
+            )
+            trials_dict[trial_idx]["x_diff"] = float(x_diff)
+            trials_dict[trial_idx]["y_diff"] = float(y_diff)
+            trials_dict[trial_idx]["num_char_lines"] = len(line_ycoords_no_pad)
+            trials_dict[trial_idx]["line_heights"] = line_heights
+            trials_dict[trial_idx]["chars_list"] = chars_list
+            words_list = add_words(trials_dict[trial_idx], close_gap_between_words=close_gap_between_words)
+            trials_dict[trial_idx]["words_list"] = words_list
+    return trials_dict
+def file_to_trials_and_lines(uploaded_file, asc_encoding: str = "ISO-8859-15", close_gap_between_words=True):
+    if isinstance(uploaded_file, str) or isinstance(uploaded_file, pl.Path):
+        with open(uploaded_file, "r", encoding=asc_encoding) as f:
+            lines = f.readlines()
+    else:
+        stringio = StringIO(uploaded_file.getvalue().decode(asc_encoding))
+        loaded_str = stringio.read()
+        lines = loaded_str.split("\n")
+    trials_dict = asc_lines_to_trials_by_trail_id(
+        lines, True, uploaded_file, close_gap_between_words=close_gap_between_words
+    )
+    if "paragraph_trials" not in trials_dict.keys() and "trial_is" in trials_dict[0].keys():
+        paragraph_trials = []
+        for k in range(trials_dict["max_trial_idx"]):
+            if trials_dict[k]["trial_is"] == "paragraph":
+                paragraph_trials.append(k)
+        trials_dict["paragraph_trials"] = paragraph_trials
+    enum = (
+        trials_dict["paragraph_trials"]
+        if "paragraph_trials" in trials_dict.keys()
+        else range(trials_dict["max_trial_idx"])
+    )
+    for k in enum:
+        if "chars_list" in trials_dict[k].keys():
+            max_line = trials_dict[k]["chars_list"][-1]["assigned_line"]
+            words_on_lines = {x: [] for x in range(max_line + 1)}
+            [words_on_lines[x["assigned_line"]].append(x["char"]) for x in trials_dict[k]["chars_list"]]
+            sentence_list = ["".join([s for s in v]) for idx, v in words_on_lines.items()]
+            text = sentence_list[0] + "\n".join([x for x in sentence_list[1:]])
+            trials_dict[k]["sentence_list"] = sentence_list
+            trials_dict[k]["text"] = text
+            trials_dict[k]["max_line"] = max_line
+    return trials_dict, lines
+def get_plot_props(trial, available_fonts):
+    if "font" in trial.keys():
+        font = trial["font"]
+        font_size = trial["font_size"]
+        if font not in available_fonts:
+            font = "DejaVu Sans Mono"
+    else:
+        font = "DejaVu Sans Mono"
+        font_size = 21
+    dpi = 100
+    if "display_coords" in trial.keys():
+        screen_res = (trial["display_coords"][2], trial["display_coords"][3])
+    else:
+        screen_res = (1920, 1080)
+    return font, font_size, dpi, screen_res
+def trial_to_dfs(
+    trial: dict, lines: list, use_synctime: bool = False, save_lines_to_txt=False, cut_out_outer_fixations=False
+):
+    """trial should be dict of line numbers of trials.
+    lines should be list of lines from .asc file."""
+    if use_synctime and "synctime" in trial:
+        idx0, idxend = trial["synctime"] + 1, trial["trial_result_idx"]
+    else:
+        idx0, idxend = trial["start_idx"], trial["end_idx"]
+    line_dicts = []
+    fixations_dicts = []
+    blink_started = False
+    fixation_started = False
+    efix_count = 0
+    sfix_count = 0
+    sblink_count = 0
+    if save_lines_to_txt:
+        with open("Lines_plus500.txt", "w") as f:
+            f.writelines(lines[idx0 - 500 : idxend + 500])
+    eye_to_use = "R"
+    for l in lines[idx0 : idxend + 1]:
+        if "EFIX R" in l:
+            eye_to_use = "R"
+            break
+        elif "EFIX L" in l:
+            eye_to_use = "L"
+            break
+    for l in lines[idx0 : idxend + 1]:
+        parts = [x.strip() for x in l.split("\t")]
+        if f"EFIX {eye_to_use}" in l:
+            efix_count += 1
+            if fixation_started:
+                if parts[1] == "." and parts[2] == ".":
+                    continue
+                fixations_dicts.append(
+                    {
+                        "start_time": float(parts[0].split()[-1].strip()),
+                        "end_time": float(parts[1].strip()),
+                        "duration": float(parts[2].strip()),
+                        "x": float(parts[3].strip()),
+                        "y": float(parts[4].strip()),
+                        "pupil_size": float(parts[5].strip()),
+                    }
+                )
+                if len(fixations_dicts) >= 2:
+                    assert (
+                        fixations_dicts[-1]["start_time"] > fixations_dicts[-2]["start_time"]
+                    ), "start times not in order"
+                fixation_started = False
+        elif f"SFIX {eye_to_use}" in l:
+            sfix_count += 1
+            fixation_started = True
+        elif f"SBLINK {eye_to_use}" in l:
+            sblink_count += 1
+            blink_started = True
+        if not blink_started and not any([True for x in event_strs if x in l]):
+            if len(parts) < 3 or (parts[1] == "." and parts[2] == "."):
+                continue
+            line_dicts.append(
+                {
+                    "idx": float(parts[0].strip()),
+                    "x": float(parts[1].strip()),
+                    "y": float(parts[2].strip()),
+                    "p": float(parts[3].strip()),
+                }
+            )
+        elif f"EBLINK {eye_to_use}" in l:
+            blink_started = False
+    df = pd.DataFrame(line_dicts)
+    dffix = pd.DataFrame(fixations_dicts)
+    if len(fixations_dicts) > 0:
+        dffix["corrected_start_time"] = dffix.start_time - trial["trial_start_time"]
+        dffix["corrected_end_time"] = dffix.end_time - trial["trial_start_time"]
+        dffix["fix_duration"] = dffix.corrected_end_time.values - dffix.corrected_start_time.values
+        assert all(np.diff(dffix["corrected_start_time"]) > 0), "start times not in order"
+    else:
+        df, pd.DataFrame(), trial
+    if cut_out_outer_fixations:
+        dffix = dffix[(dffix.x > -10) & (dffix.y > -10) & (dffix.x < 1050) & (dffix.y < 800)]
+    trial["efix_count"] = efix_count
+    trial["eye_to_use"] = eye_to_use
+    trial["sfix_count"] = sfix_count
+    trial["sblink_count"] = sblink_count
+    return df, dffix, trial
+def get_save_path(fpath, fname_ending):
+    save_path = gradio_plots.joinpath(f"{fpath.stem}_{fname_ending}.png")
+    return save_path
+def save_im_load_convert(fpath, fig, fname_ending, mode):
+    save_path = get_save_path(fpath, fname_ending)
+    fig.savefig(save_path)
+    im = Image.open(save_path).convert(mode)
+    im.save(save_path)
+    return im
+def get_fig_ax(screen_res, dpi, words_df, x_margin, y_margin, dffix=None, prefix="word"):
+    fig = plt.figure(figsize=(screen_res[0] / dpi, screen_res[1] / dpi), dpi=dpi)
+    ax = plt.Axes(fig, [0.0, 0.0, 1.0, 1.0])
+    ax.set_axis_off()
+    if dffix is not None:
+        ax.set_ylim((dffix.y.min(), dffix.y.max()))
+        ax.set_xlim((dffix.x.min(), dffix.x.max()))
+    else:
+        ax.set_ylim((words_df[f"{prefix}_y_center"].min() - y_margin, words_df[f"{prefix}_y_center"].max() + y_margin))
+        ax.set_xlim((words_df[f"{prefix}_x_center"].min() - x_margin, words_df[f"{prefix}_x_center"].max() + x_margin))
+    ax.invert_yaxis()
+    fig.add_axes(ax)
+    return fig, ax
+def plot_text_boxes_fixations(
+    fpath,
+    dpi,
+    screen_res,
+    data_dir_sub,
+    set_font_size: bool,
+    font_size: int,
+    use_words: bool,
+    save_channel_repeats: bool,
+    save_combo_grey_and_rgb: bool,
+    dffix=None,
+    trial=None,
+):
+    if isinstance(fpath, str):
+        fpath = pl.Path(fpath)
+    if use_words:
+        prefix = "word"
+    else:
+        prefix = "char"
+    if dffix is None:
+        dffix = pd.read_csv(fpath)
+    if trial is None:
+        json_fpath = str(fpath).replace("_fixations.csv", "_trial.json")
+        with open(json_fpath, "r") as f:
+            trial = json.load(f)
+    words_df = pd.DataFrame(trial[f"{prefix}s_list"])
+    x_right = words_df[f"{prefix}_xmin"]
+    x_left = words_df[f"{prefix}_xmax"]
+    y_top = words_df[f"{prefix}_ymax"]
+    y_bottom = words_df[f"{prefix}_ymin"]
+    if f"{prefix}_x_center" not in words_df.columns:
+        words_df[f"{prefix}_x_center"] = (words_df[f"{prefix}_xmax"] - words_df[f"{prefix}_xmin"]) / 2 + words_df[
+            f"{prefix}_xmin"
+        ]
+        words_df[f"{prefix}_y_center"] = (words_df[f"{prefix}_ymax"] - words_df[f"{prefix}_ymin"]) / 2 + words_df[
+            f"{prefix}_ymin"
+        ]
+    x_margin = words_df[f"{prefix}_x_center"].mean() / 8
+    y_margin = words_df[f"{prefix}_y_center"].mean() / 4
+    times = dffix.corrected_start_time - dffix.corrected_start_time.min()
+    times = times / times.max()
+    times = np.linspace(0.25, 1, len(times))
+    if set_font_size:
+        font = "monospace"
+    else:
+        font_size = trial["font_size"] * 27 // dpi
+    font_props = FontProperties(family=font, style="normal", size=font_size)
+    if save_combo_grey_and_rgb:
+        fig, ax = get_fig_ax(screen_res, dpi, words_df, x_margin, y_margin, prefix=prefix)
+        ax.scatter(dffix.x, dffix.y, alpha=times, facecolor="b")
+        for idx in range(len(x_left)):
+            xdiff = x_right[idx] - x_left[idx]
+            ydiff = y_top[idx] - y_bottom[idx]
+            rect = patches.Rectangle(
+                (x_left[idx] - 1, y_bottom[idx] - 1),
+                xdiff,
+                ydiff,
+                alpha=0.9,
+                linewidth=0.8,
+                edgecolor="r",
+                facecolor="none",
+            )  # seems to need one pixel offset
+            ax.text(
+                words_df[f"{prefix}_x_center"][idx],
+                words_df[f"{prefix}_y_center"][idx],
+                words_df[prefix][idx],
+                horizontalalignment="center",
+                verticalalignment="center",
+                fontproperties=font_props,
+                color="g",
+            )
+            ax.add_patch(rect)
+        fname_ending = f"{prefix}s_combo_rgb"
+        words_combo_rgb_im = save_im_load_convert(fpath, fig, fname_ending, "RGB")
+        plt.close("all")
+        fig, ax = get_fig_ax(screen_res, dpi, words_df, x_margin, y_margin, prefix=prefix)
+        ax.scatter(dffix.x, dffix.y, facecolor="k", alpha=times)
+        for idx in range(len(x_left)):
+            xdiff = x_right[idx] - x_left[idx]
+            ydiff = y_top[idx] - y_bottom[idx]
+            rect = patches.Rectangle(
+                (x_left[idx] - 1, y_bottom[idx] - 1),
+                xdiff,
+                ydiff,
+                alpha=0.9,
+                linewidth=0.8,
+                edgecolor="k",
+                facecolor="none",
+            )  # seems to need one pixel offset
+            ax.text(
+                words_df[f"{prefix}_x_center"][idx],
+                words_df[f"{prefix}_y_center"][idx],
+                words_df[prefix][idx],
+                horizontalalignment="center",
+                verticalalignment="center",
+                fontproperties=font_props,
+            )
+            ax.add_patch(rect)
+        fname_ending = f"{prefix}s_combo_grey"
+        words_combo_grey_im = save_im_load_convert(fpath, fig, fname_ending, "L")
+        plt.close("all")
+    fig, ax = get_fig_ax(screen_res, dpi, words_df, x_margin, y_margin, prefix=prefix)
+    ax.scatter(words_df[f"{prefix}_x_center"], words_df[f"{prefix}_y_center"], s=1, facecolor="k", alpha=0.01)
+    for idx in range(len(x_left)):
+        ax.text(
+            words_df[f"{prefix}_x_center"][idx],
+            words_df[f"{prefix}_y_center"][idx],
+            words_df[prefix][idx],
+            horizontalalignment="center",
+            verticalalignment="center",
+            fontproperties=font_props,
+        )
+    fname_ending = f"{prefix}s_grey"
+    words_grey_im = save_im_load_convert(fpath, fig, fname_ending, "L")
+    plt.close("all")
+    fig, ax = get_fig_ax(screen_res, dpi, words_df, x_margin, y_margin, prefix=prefix)
+    ax.scatter(words_df[f"{prefix}_x_center"], words_df[f"{prefix}_y_center"], s=1, facecolor="k", alpha=0.1)
+    for idx in range(len(x_left)):
+        xdiff = x_right[idx] - x_left[idx]
+        ydiff = y_top[idx] - y_bottom[idx]
+        rect = patches.Rectangle(
+            (x_left[idx] - 1, y_bottom[idx] - 1), xdiff, ydiff, alpha=0.9, linewidth=1, edgecolor="k", facecolor="grey"
+        )  # seems to need one pixel offset
+        ax.add_patch(rect)
+    fname_ending = f"{prefix}_boxes_grey"
+    word_boxes_grey_im = save_im_load_convert(fpath, fig, fname_ending, "L")
+    plt.close("all")
+    fig, ax = get_fig_ax(screen_res, dpi, words_df, x_margin, y_margin, prefix=prefix)
+    ax.scatter(dffix.x, dffix.y, facecolor="k", alpha=times)
+    fname_ending = "fix_scatter_grey"
+    fix_scatter_grey_im = save_im_load_convert(fpath, fig, fname_ending, "L")
+    plt.close("all")
+    arr_combo = np.stack(
+        [
+            np.asarray(words_grey_im),
+            np.asarray(word_boxes_grey_im),
+            np.asarray(fix_scatter_grey_im),
+        ],
+        axis=2,
+    )
+    im_combo = Image.fromarray(arr_combo)
+    fname_ending = f"{prefix}s_channel_sep"
+    save_path = get_save_path(fpath, fname_ending)
+    print(f"save_path for im combo is {save_path}")
+    im_combo.save(fpath)
+    if save_channel_repeats:
+        arr_combo = np.stack([np.asarray(words_grey_im)] * 3, axis=2)
+        im_combo = Image.fromarray(arr_combo)
+        fname_ending = f"{prefix}s_channel_repeat"
+        save_path = get_save_path(fpath, fname_ending)
+        im_combo.save(save_path)
+        arr_combo = np.stack([np.asarray(word_boxes_grey_im)] * 3, axis=2)
+        im_combo = Image.fromarray(arr_combo)
+        fname_ending = f"{prefix}boxes_channel_repeat"
+        save_path = get_save_path(fpath, fname_ending)
+        im_combo.save(save_path)
+        arr_combo = np.stack([np.asarray(fix_scatter_grey_im)] * 3, axis=2)
+        im_combo = Image.fromarray(arr_combo)
+        fname_ending = "fix_channel_repeat"
+        save_path = get_save_path(fpath, fname_ending)
+        im_combo.save(save_path)
+def add_line_overlaps_to_sample(trial, sample):
+    char_df = pd.DataFrame(trial["chars_list"])
+    line_overlaps = []
+    for arr in sample:
+        y_val = arr[1]
+        line_overlap = t.tensor(-1, dtype=t.float32)
+        for idx, (x1, x2) in enumerate(zip(char_df.char_ymin.unique(), char_df.char_ymax.unique())):
+            if x1 <= y_val <= x2:
+                line_overlap = t.tensor(idx, dtype=t.float32)
+                break
+        line_overlaps.append(line_overlap)
+    line_olaps_tensor = t.stack(line_overlaps, dim=0)
+    sample = t.cat([sample, line_olaps_tensor.unsqueeze(1)], dim=1)
+    return sample
+def norm_coords_by_letter_min_x_y(
+    sample_idx: int,
+    trialslist: list,
+    samplelist: list,
+    chars_center_coords_list: list = None,
+):
+    chars_df = pd.DataFrame(trialslist[sample_idx]["chars_list"])
+    trialslist[sample_idx]["x_char_unique"] = chars_df.char_xmin.unique()
+    min_x_chars = chars_df.char_xmin.min()
+    min_y_chars = chars_df.char_ymin.min()
+    norm_vector_substract = t.zeros(
+        (1, samplelist[sample_idx].shape[1]), dtype=samplelist[sample_idx].dtype, device=samplelist[sample_idx].device
+    )
+    norm_vector_substract[0, 0] = norm_vector_substract[0, 0] + 1 * min_x_chars
+    norm_vector_substract[0, 1] = norm_vector_substract[0, 1] + 1 * min_y_chars
+    samplelist[sample_idx] = samplelist[sample_idx] - norm_vector_substract
+    if chars_center_coords_list is not None:
+        norm_vector_substract = norm_vector_substract.squeeze(0)[:2]
+        if chars_center_coords_list[sample_idx].shape[-1] == norm_vector_substract.shape[-1] * 2:
+            chars_center_coords_list[sample_idx][:, :2] -= norm_vector_substract
+            chars_center_coords_list[sample_idx][:, 2:] -= norm_vector_substract
+        else:
+            chars_center_coords_list[sample_idx] -= norm_vector_substract
+    return trialslist, samplelist, chars_center_coords_list
+def norm_coords_by_letter_positions(
+    sample_idx: int,
+    trialslist: list,
+    samplelist: list,
+    meanlist: list = None,
+    stdlist: list = None,
+    return_mean_std_lists=False,
+    norm_by_char_averages=False,
+    chars_center_coords_list: list = None,
+    add_normalised_values_as_features=False,
+):
+    chars_df = pd.DataFrame(trialslist[sample_idx]["chars_list"])
+    trialslist[sample_idx]["x_char_unique"] = chars_df.char_xmin.unique()
+    min_x_chars = chars_df.char_xmin.min()
+    max_x_chars = chars_df.char_xmax.max()
+    norm_vector_multi = t.ones(
+        (1, samplelist[sample_idx].shape[1]), dtype=samplelist[sample_idx].dtype, device=samplelist[sample_idx].device
+    )
+    if norm_by_char_averages:
+        chars_list = trialslist[sample_idx]["chars_list"]
+        char_widths = np.asarray([x["char_xmax"] - x["char_xmin"] for x in chars_list])
+        char_heights = np.asarray([x["char_ymax"] - x["char_ymin"] for x in chars_list])
+        char_widths_average = np.mean(char_widths[char_widths > 0])
+        char_heights_average = np.mean(char_heights[char_heights > 0])
+        norm_vector_multi[0, 0] = norm_vector_multi[0, 0] * char_widths_average
+        norm_vector_multi[0, 1] = norm_vector_multi[0, 1] * char_heights_average
+    else:
+        line_height = min(np.unique(trialslist[sample_idx]["line_heights"]))
+        line_width = max_x_chars - min_x_chars
+        norm_vector_multi[0, 0] = norm_vector_multi[0, 0] * line_width
+        norm_vector_multi[0, 1] = norm_vector_multi[0, 1] * line_height
+    assert ~t.any(t.isnan(norm_vector_multi)), "Nan found in char norming vector"
+    norm_vector_multi = norm_vector_multi.squeeze(0)
+    if add_normalised_values_as_features:
+        norm_vector_multi = norm_vector_multi[norm_vector_multi != 1]
+        normed_features = samplelist[sample_idx][:, : norm_vector_multi.shape[0]] / norm_vector_multi
+        samplelist[sample_idx] = t.cat([samplelist[sample_idx], normed_features], dim=1)
+    else:
+        samplelist[sample_idx] = samplelist[sample_idx] / norm_vector_multi  #  in case time or pupil size is included
+    if chars_center_coords_list is not None:
+        norm_vector_multi = norm_vector_multi[:2]
+        if chars_center_coords_list[sample_idx].shape[-1] == norm_vector_multi.shape[-1] * 2:
+            chars_center_coords_list[sample_idx][:, :2] /= norm_vector_multi
+            chars_center_coords_list[sample_idx][:, 2:] /= norm_vector_multi
+        else:
+            chars_center_coords_list[sample_idx] /= norm_vector_multi
+    if return_mean_std_lists:
+        mean_val = samplelist[sample_idx].mean(axis=0).cpu().numpy()
+        meanlist.append(mean_val)
+        std_val = samplelist[sample_idx].std(axis=0).cpu().numpy()
+        stdlist.append(std_val)
+        assert ~any(np.isnan(mean_val)), "Nan found in mean_val"
+        assert ~any(np.isnan(mean_val)), "Nan found in std_val"
+        return trialslist, samplelist, meanlist, stdlist, chars_center_coords_list
+    return trialslist, samplelist, chars_center_coords_list
+def remove_compile_from_model(model):
+    if hasattr(model.project, "_orig_mod"):
+        model.project = model.project._orig_mod
+        model.chars_conv = model.chars_conv._orig_mod
+        model.chars_classifier = model.chars_classifier._orig_mod
+        model.layer_norm_in = model.layer_norm_in._orig_mod
+        model.bert_model = model.bert_model._orig_mod
+        model.linear = model.linear._orig_mod
+    else:
+        print(f"remove_compile_from_model not done since model.project {model.project} has no orig_mod")
+    return model
+def remove_compile_from_dict(state_dict):
+    for key in list(state_dict.keys()):
+        newkey = key.replace("._orig_mod.", ".")
+        state_dict[newkey] = state_dict.pop(key)
+    return state_dict
+def add_text_to_ax(
+    chars_list,
+    ax,
+    font_to_use="DejaVu Sans Mono",
+    fontsize=21,
+    prefix="char",
+    plot_boxes=True,
+    plot_text=True,
+    box_annotations=None,
+):
+    font_props = FontProperties(family=font_to_use, style="normal", size=fontsize)
+    if not plot_boxes and not plot_text:
+        return None
+    if box_annotations is None:
+        enum = chars_list
+    else:
+        enum = zip(chars_list, box_annotations)
+    for v in enum:
+        if box_annotations is not None:
+            v, annot_text = v
+        x0, y0 = v[f"{prefix}_xmin"], v[f"{prefix}_ymin"]
+        xdiff, ydiff = v[f"{prefix}_xmax"] - v[f"{prefix}_xmin"], v[f"{prefix}_ymax"] - v[f"{prefix}_ymin"]
+        if plot_text:
+            ax.text(
+                v[f"{prefix}_x_center"],
+                v[f"{prefix}_y_center"],
+                v[prefix],
+                horizontalalignment="center",
+                verticalalignment="center",
+                fontproperties=font_props,
+            )
+        if plot_boxes:
+            ax.add_patch(Rectangle((x0, y0), xdiff, ydiff, edgecolor="grey", facecolor="none", lw=0.8, alpha=0.4))
+        if box_annotations is not None:
+            ax.annotate(
+                str(annot_text),
+                (x0 + xdiff / 2, y0),
+                horizontalalignment="center",
+                verticalalignment="center",
+                fontproperties=FontProperties(family=font_to_use, style="normal", size=fontsize / 1.5),
+            )
+def plot_fixations_and_text(
+    dffix: pd.DataFrame,
+    trial: dict,
+    plot_prefix="chars_",
+    show=False,
+    returnfig=False,
+    save=False,
+    savelocation="plot.png",
+    font_to_use="DejaVu Sans Mono",
+    fontsize=20,
+    plot_classic=True,
+    plot_boxes=True,
+    plot_text=True,
+    fig_size=(14, 8),
+    dpi=300,
+    turn_axis_on=True,
+    algo_choice="slice",
+):
+    fig, ax = plt.subplots(1, 1, figsize=fig_size, tight_layout=True, dpi=dpi)
+    if f"{plot_prefix}list" in trial.keys():
+        add_text_to_ax(
+            trial[f"{plot_prefix}list"],
+            ax,
+            font_to_use,
+            fontsize=fontsize,
+            prefix=plot_prefix[:-2],
+            plot_boxes=plot_boxes,
+            plot_text=plot_text,
+        )
+    ax.plot(dffix.x, dffix.y, "kX", label="Raw Fixations", alpha=0.9)
+    if plot_classic and f"line_num_{algo_choice}" in dffix.columns:
+        ax.scatter(
+            dffix.x,
+            dffix[f"y_{algo_choice}"],
+            marker="*",
+            color="tab:green",
+            label=f"{algo_choice} Prediction",
+            alpha=0.9,
+        )
+        for x_before, y_before, x_after, y_after in zip(
+            dffix.x.values, dffix[f"y_{algo_choice}"].values, dffix.x, dffix.y
+        ):
+            arr_delta_x = x_after - x_before
+            arr_delta_y = y_after - y_before
+            ax.arrow(x_before, y_before, arr_delta_x, arr_delta_y, color="tab:green", alpha=0.6)
+    ax.set_ylabel("y (pixel)")
+    ax.set_xlabel("x (pixel)")
+    ax.invert_yaxis()
+    ax.legend(bbox_to_anchor=(1, 1), loc="upper left")
+    if not turn_axis_on:
+        ax.axis("off")
+    if save:
+        plt.savefig(savelocation, dpi=dpi)
+    if show:
+        plt.show()
+    if returnfig:
+        return fig
+    else:
+        plt.close()
+        return None
+def make_folders(gradio_temp_folder, gradio_temp_unzipped_folder, gradio_plots):
+    gradio_temp_folder.mkdir(exist_ok=True)
+    gradio_temp_unzipped_folder.mkdir(exist_ok=True)
+    gradio_plots.mkdir(exist_ok=True)
+    return 0
+def get_classic_cfg(fname):
+    with open(fname, "r") as f:
+        jsonsstring = f.read()
+    classic_algos_cfg = json.loads(jsonsstring)
+    classic_algos_cfg["slice"] = classic_algos_cfg["slice"]
+    classic_algos_cfg = classic_algos_cfg
+    return classic_algos_cfg
+def find_and_load_model(model_date="20240104-223349"):
+    model_cfg_file = list(DIST_MODELS_FOLDER.glob(f"*{model_date}*.yaml"))
+    if len(model_cfg_file) == 0:
+        if "logger" in st.session_state:
+            st.session_state["logger"].warning(f"No model cfg yaml found for {model_date}")
+        return None, None
+    model_cfg_file = model_cfg_file[0]
+    with open(model_cfg_file) as f:
+        model_cfg = yaml.safe_load(f)
+    model_cfg["system_type"] = "linux"
+    model_file = list(pl.Path("models").glob(f"*{model_date}*.ckpt"))[0]
+    model = load_model(model_file, model_cfg)
+    return model, model_cfg
+def load_model(model_file, cfg):
+    try:
+        model_loaded = t.load(model_file, map_location="cpu")
+        if "hyper_parameters" in model_loaded.keys():
+            model_cfg_temp = model_loaded["hyper_parameters"]["cfg"]
+        else:
+            model_cfg_temp = cfg
+        model_state_dict = model_loaded["state_dict"]
+    except Exception as e:
+        if "logger" in st.session_state:
+            st.session_state["logger"].warning(e)
+        if "logger" in st.session_state:
+            st.session_state["logger"].warning(f"Failed to load {model_file}")
+        return None
+    model = LitModel(
+        [1, 500, 3],
+        model_cfg_temp["hidden_dim_bert"],
+        model_cfg_temp["num_attention_heads"],
+        model_cfg_temp["n_layers_BERT"],
+        model_cfg_temp["loss_function"],
+        1e-4,
+        model_cfg_temp["weight_decay"],
+        model_cfg_temp,
+        model_cfg_temp["use_lr_warmup"],
+        model_cfg_temp["use_reduce_on_plateau"],
+        track_gradient_histogram=model_cfg_temp["track_gradient_histogram"],
+        register_forw_hook=model_cfg_temp["track_activations_via_hook"],
+        char_dims=model_cfg_temp["char_dims"],
+    )
+    model = remove_compile_from_model(model)
+    model_state_dict = remove_compile_from_dict(model_state_dict)
+    with t.no_grad():
+        model.load_state_dict(model_state_dict, strict=False)
+    model.eval()
+    model.freeze()
+    return model
+def set_up_models(dist_models_folder):
+    out_dict = {}
+    if "logger" in st.session_state:
+        st.session_state["logger"].info("Loading Ensemble")
+    dist_models_with_norm = list(dist_models_folder.glob("*normalize_by_line_height_and_width_True*.ckpt"))
+    dist_models_without_norm = list(dist_models_folder.glob("*normalize_by_line_height_and_width_False*.ckpt"))
+    DIST_MODEL_DATE_WITH_NORM = dist_models_with_norm[0].stem.split("_")[1]
+    models_without_norm_df = [find_and_load_model(m_file.stem.split("_")[1]) for m_file in dist_models_without_norm]
+    models_with_norm_df = [find_and_load_model(m_file.stem.split("_")[1]) for m_file in dist_models_with_norm]
+    model_cfg_without_norm_df = [x[1] for x in models_without_norm_df if x[1] is not None][0]
+    model_cfg_with_norm_df = [x[1] for x in models_with_norm_df if x[1] is not None][0]
+    models_without_norm_df = [x[0] for x in models_without_norm_df if x[0] is not None]
+    models_with_norm_df = [x[0] for x in models_with_norm_df if x[0] is not None]
+    ensemble_model_avg = EnsembleModel(
+        models_without_norm_df, models_with_norm_df, learning_rate=0.0058, use_simple_average=True
+    )
+    out_dict["ensemble_model_avg"] = ensemble_model_avg
+    out_dict["model_cfg_without_norm_df"] = model_cfg_without_norm_df
+    out_dict["model_cfg_with_norm_df"] = model_cfg_with_norm_df
+    single_DIST_model, single_DIST_model_cfg = find_and_load_model(model_date=DIST_MODEL_DATE_WITH_NORM)
+    out_dict["DIST_MODEL_DATE_WITH_NORM"] = DIST_MODEL_DATE_WITH_NORM
+    out_dict["single_DIST_model"] = single_DIST_model
+    out_dict["single_DIST_model_cfg"] = single_DIST_model_cfg
+    return out_dict
+def prep_data_for_dist(model_cfg, dffix, trial=None):
+    if "logger" in st.session_state:
+        st.session_state["logger"].debug("prep_data_for_dist entered")
+    if trial is None:
+        trial = st.session_state["trial"]
+    if isinstance(dffix, dict):
+        dffix = dffix["value"]
+    sample_tensor = t.tensor(dffix.loc[:, model_cfg["sample_cols"]].to_numpy(), dtype=t.float32)
+    if model_cfg["add_line_overlap_feature"]:
+        sample_tensor = add_line_overlaps_to_sample(trial, sample_tensor)
+    has_nans = t.any(t.isnan(sample_tensor))
+    assert not has_nans, "NaNs found in sample tensor"
+    samplelist_eval = [sample_tensor]
+    trialslist_eval = [trial]
+    chars_center_coords_list_eval = None
+    if model_cfg["norm_coords_by_letter_min_x_y"]:
+        for sample_idx, _ in enumerate(samplelist_eval):
+            trialslist_eval, samplelist_eval, chars_center_coords_list_eval = norm_coords_by_letter_min_x_y(
+                sample_idx,
+                trialslist_eval,
+                samplelist_eval,
+                chars_center_coords_list=chars_center_coords_list_eval,
+            )
+    if model_cfg["normalize_by_line_height_and_width"]:
+        meanlist_eval, stdlist_eval = [], []
+        for sample_idx, _ in enumerate(samplelist_eval):
+            (
+                trialslist_eval,
+                samplelist_eval,
+                meanlist_eval,
+                stdlist_eval,
+                chars_center_coords_list_eval,
+            ) = norm_coords_by_letter_positions(
+                sample_idx,
+                trialslist_eval,
+                samplelist_eval,
+                meanlist_eval,
+                stdlist_eval,
+                return_mean_std_lists=True,
+                norm_by_char_averages=model_cfg["norm_by_char_averages"],
+                chars_center_coords_list=chars_center_coords_list_eval,
+                add_normalised_values_as_features=model_cfg["add_normalised_values_as_features"],
+            )
+    sample_tensor = samplelist_eval[0]
+    sample_means = t.tensor(model_cfg["sample_means"], dtype=t.float32)
+    sample_std = t.tensor(model_cfg["sample_std"], dtype=t.float32)
+    sample_tensor = (sample_tensor - sample_means) / sample_std
+    sample_tensor = sample_tensor.unsqueeze(0)
+    if "logger" in st.session_state:
+        st.session_state["logger"].info(f"Using path {trial['plot_file']} for plotting")
+    plot_text_boxes_fixations(
+        fpath=trial["plot_file"],
+        dpi=250,
+        screen_res=(1024, 768),
+        data_dir_sub=None,
+        set_font_size=True,
+        font_size=4,
+        use_words=False,
+        save_channel_repeats=False,
+        save_combo_grey_and_rgb=False,
+        dffix=dffix,
+        trial=trial,
+    )
+    val_set = DSet(
+        sample_tensor,
+        None,
+        t.zeros((1, sample_tensor.shape[1])),
+        trialslist_eval,
+        padding_list=[0],
+        padding_at_end=model_cfg["padding_at_end"],
+        return_images_for_conv=True,
+        im_partial_string=model_cfg["im_partial_string"],
+        input_im_shape=model_cfg["char_plot_shape"],
+    )
+    val_loader = dl(val_set, batch_size=1, shuffle=False, num_workers=0)
+    return val_loader, val_set
+def fold_in_seq_dim(out, y=None):
+    batch_size, seq_len, num_classes = out.shape
+    out = eo.rearrange(out, "b s c -> (b s) c", s=seq_len)
+    if y is None:
+        return out, None
+    if len(y.shape) > 2:
+        y = eo.rearrange(y, "b s c -> (b s) c", s=seq_len)
+    else:
+        y = eo.rearrange(y, "b s -> (b s)", s=seq_len)
+    return out, y
+def logits_to_pred(out, y=None):
+    seq_len = out.shape[1]
+    out, y = fold_in_seq_dim(out, y)
+    preds = corn_label_from_logits(out)
+    preds = eo.rearrange(preds, "(b s) -> b s", s=seq_len)
+    if y is not None:
+        y = eo.rearrange(y.squeeze(), "(b s) -> b s", s=seq_len)
+        y = y
+    return preds, y
+def get_DIST_preds(dffix, trial, models_dict=None):
+    algo_choice = "DIST"
+    if models_dict is None:
+        if st.session_state["single_DIST_model"] is None or st.session_state["single_DIST_model_cfg"] is None:
+            st.session_state["single_DIST_model"], st.session_state["single_DIST_model_cfg"] = find_and_load_model(
+                model_date=st.session_state["DIST_MODEL_DATE_WITH_NORM"]
+            )
+            if "logger" in st.session_state:
+                st.session_state["logger"].info("Model is None, reiniting model")
+        else:
+            model = st.session_state["single_DIST_model"]
+        loader, dset = prep_data_for_dist(st.session_state["single_DIST_model_cfg"], dffix, trial)
+    else:
+        model = models_dict["single_DIST_model"]
+        loader, dset = prep_data_for_dist(models_dict["single_DIST_model_cfg"], dffix, trial)
+    batch = next(iter(loader))
+    if "cpu" not in str(model.device):
+        batch = [x.cuda() for x in batch]
+    try:
+        out = model(batch)
+        preds, y = logits_to_pred(out, y=None)
+        if "logger" in st.session_state:
+            st.session_state["logger"].debug(
+                f"y_char_unique are {trial['y_char_unique']} for trial {trial['trial_id']}"
+            )
+        if "logger" in st.session_state:
+            st.session_state["logger"].debug(f"trial keys are {trial.keys()} for trial {trial['trial_id']}")
+        if "logger" in st.session_state:
+            st.session_state["logger"].debug(
+                f"chars_list has len {len(trial['chars_list'])} for trial {trial['trial_id']}"
+            )
+        if "logger" in st.session_state:
+            st.session_state["logger"].debug(f"y_char_unique  {trial['y_char_unique']} for trial {trial['trial_id']}")
+        if len(trial["y_char_unique"]) < 1:
+            y_char_unique = pd.DataFrame(trial["chars_list"]).char_y_center.sort_values().unique()
+        else:
+            y_char_unique = trial["y_char_unique"]
+        num_lines = trial["num_char_lines"] - 1
+        preds = t.clamp(preds, 0, num_lines).squeeze().cpu().numpy()
+        y_pred_DIST = [y_char_unique[idx] for idx in preds]
+        dffix[f"line_num_{algo_choice}"] = preds
+        dffix[f"y_{algo_choice}"] = np.round(y_pred_DIST, decimals=1)
+        dffix[f"y_{algo_choice}_correction"] = (dffix.loc[:, f"y_{algo_choice}"] - dffix.loc[:, "y"]).round(1)
+    except Exception as e:
+        if "logger" in st.session_state:
+            st.session_state["logger"].warning(f"Exception on model(batch) for DIST \n{e}")
+    return dffix
+def get_DIST_ensemble_preds(
+    dffix,
+    trial,
+    model_cfg_without_norm_df,
+    model_cfg_with_norm_df,
+    ensemble_model_avg,
+):
+    algo_choice = "DIST-Ensemble"
+    loader_without_norm, dset_without_norm = prep_data_for_dist(model_cfg_without_norm_df, dffix, trial)
+    loader_with_norm, dset_with_norm = prep_data_for_dist(model_cfg_with_norm_df, dffix, trial)
+    batch_without_norm = next(iter(loader_without_norm))
+    batch_with_norm = next(iter(loader_with_norm))
+    out = ensemble_model_avg((batch_without_norm, batch_with_norm))
+    preds, y = logits_to_pred(out[0]["out_avg"], y=None)
+    if len(trial["y_char_unique"]) < 1:
+        y_char_unique = pd.DataFrame(trial["chars_list"]).char_y_center.sort_values().unique()
+    else:
+        y_char_unique = trial["y_char_unique"]
+    num_lines = trial["num_char_lines"] - 1
+    preds = t.clamp(preds, 0, num_lines).squeeze().cpu().numpy()
+    if "logger" in st.session_state:
+        st.session_state["logger"].debug(f"preds are {preds} for trial {trial['trial_id']}")
+    y_pred_DIST = [y_char_unique[idx] for idx in preds]
+    dffix[f"line_num_{algo_choice}"] = preds
+    dffix[f"y_{algo_choice}"] = np.round(y_pred_DIST, decimals=1)
+    dffix[f"y_{algo_choice}_correction"] = (dffix.loc[:, f"y_{algo_choice}"] - dffix.loc[:, "y"]).round(1)
+    return dffix
+def get_EDIST_preds_with_model_check(dffix, trial, ensemble_model_avg=None, models_dict=None):
+    if models_dict is None:
+        if ensemble_model_avg is None and "ensemble_model_avg" not in st.session_state:
+            if "logger" in st.session_state:
+                st.session_state["logger"].info("Ensemble Model is None, reiniting model")
+            dist_models_with_norm = DIST_MODELS_FOLDER.glob("*normalize_by_line_height_and_width_True*.ckpt")
+            dist_models_without_norm = DIST_MODELS_FOLDER.glob("*normalize_by_line_height_and_width_False*.ckpt")
+            models_without_norm_df = [
+                find_and_load_model(m_file.stem.split("_")[1]) for m_file in dist_models_without_norm
+            ]
+            models_with_norm_df = [find_and_load_model(m_file.stem.split("_")[1]) for m_file in dist_models_with_norm]
+            model_cfg_without_norm_df = [x[1] for x in models_without_norm_df if x[1] is not None][0]
+            model_cfg_with_norm_df = [x[1] for x in models_with_norm_df if x[1] is not None][0]
+            models_without_norm_df = [x[0] for x in models_without_norm_df if x[0] is not None]
+            models_with_norm_df = [x[0] for x in models_with_norm_df if x[0] is not None]
+            ensemble_model_avg = EnsembleModel(
+                models_without_norm_df, models_with_norm_df, learning_rate=0.0, use_simple_average=True
+            )
+            st.session_state["ensemble_model_avg"] = ensemble_model_avg
+            st.session_state["model_cfg_without_norm_df"] = model_cfg_without_norm_df
+            st.session_state["model_cfg_with_norm_df"] = model_cfg_with_norm_df
+        else:
+            model_cfg_without_norm_df = st.session_state["model_cfg_without_norm_df"]
+            model_cfg_with_norm_df = st.session_state["model_cfg_with_norm_df"]
+            ensemble_model_avg = st.session_state["ensemble_model_avg"]
+        dffix = get_DIST_ensemble_preds(
+            dffix,
+            trial,
+            st.session_state["model_cfg_without_norm_df"],
+            st.session_state["model_cfg_with_norm_df"],
+            st.session_state["ensemble_model_avg"],
+        )
+    else:
+        dffix = get_DIST_ensemble_preds(
+            dffix,
+            trial,
+            models_dict["model_cfg_without_norm_df"],
+            models_dict["model_cfg_with_norm_df"],
+            models_dict["ensemble_model_avg"],
+        )
+    return dffix
+def correct_df(
+    dffix,
+    algo_choice,
+    trial=None,
+    for_multi=False,
+    ensemble_model_avg=None,
+    is_outside_of_streamlit=False,
+    classic_algos_cfg=None,
+    models_dict=None,
+):
+    if is_outside_of_streamlit:
+        stqdm = tqdm
+    else:
+        from stqdm import stqdm
+    if classic_algos_cfg is None:
+        classic_algos_cfg = st.session_state["classic_algos_cfg"]
+    if trial is None and not for_multi:
+        trial = st.session_state["trial"]
+    if "logger" in st.session_state:
+        st.session_state["logger"].info(f"Applying {algo_choice} to fixations for trial {trial['trial_id']}")
+    if isinstance(dffix, dict):
+        dffix = dffix["value"]
+    if "x" not in dffix.keys() or "x" not in dffix.keys():
+        if "logger" in st.session_state:
+            st.session_state["logger"].warning(f"x or y not in dffix")
+        if "logger" in st.session_state:
+            st.session_state["logger"].warning(dffix.columns)
+        return dffix
+    if isinstance(algo_choice, list):
+        algo_choices = algo_choice
+        repeats = range(len(algo_choice))
+    else:
+        algo_choices = [algo_choice]
+        repeats = range(1)
+    for algoIdx in stqdm(repeats, desc="Applying correction algorithms"):
+        algo_choice = algo_choices[algoIdx]
+        st_proc = time.process_time()
+        st_wall = time.time()
+        if algo_choice == "DIST":
+            dffix = get_DIST_preds(dffix, trial, models_dict=models_dict)
+        elif algo_choice == "DIST-Ensemble":
+            dffix = get_EDIST_preds_with_model_check(dffix, trial, models_dict=models_dict)
+        elif algo_choice == "Wisdom_of_Crowds_with_DIST":
+            dffix, corrections = get_all_classic_preds(dffix, trial, classic_algos_cfg)
+            dffix = get_DIST_preds(dffix, trial, models_dict=models_dict)
+            for _ in range(3):
+                corrections.append(np.asarray(dffix.loc[:, "y_DIST"]))
+            dffix = apply_woc(dffix, trial, corrections, algo_choice)
+        elif algo_choice == "Wisdom_of_Crowds_with_DIST_Ensemble":
+            dffix, corrections = get_all_classic_preds(dffix, trial, classic_algos_cfg)
+            dffix = get_EDIST_preds_with_model_check(dffix, trial, ensemble_model_avg, models_dict=models_dict)
+            for _ in range(3):
+                corrections.append(np.asarray(dffix.loc[:, "y_DIST-Ensemble"]))
+            dffix = apply_woc(dffix, trial, corrections, algo_choice)
+        elif algo_choice == "Wisdom_of_Crowds":
+            dffix, corrections = get_all_classic_preds(dffix, trial, classic_algos_cfg)
+            dffix = apply_woc(dffix, trial, corrections, algo_choice)
+        else:
+            algo_cfg = classic_algos_cfg[algo_choice]
+            dffix = calgo.apply_classic_algo(dffix, trial, algo_choice, algo_cfg)
+            dffix[f"y_{algo_choice}_correction"] = (dffix.loc[:, f"y_{algo_choice}"] - dffix.loc[:, "y"]).round(1)
+        et_proc = time.process_time()
+        time_proc = et_proc - st_proc
+        et_wall = time.time()
+        time_wall = et_wall - st_wall
+        if "logger" in st.session_state:
+            st.session_state["logger"].info(f"time_proc {algo_choice} {time_proc}")
+        if "logger" in st.session_state:
+            st.session_state["logger"].info(f"time_wall {algo_choice} {time_wall}")
+    if for_multi:
+        return dffix
+    else:
+        if "start_time" in dffix.columns:
+            dffix = dffix.drop(axis=1, labels=["start_time", "end_time"])
+        return dffix, export_csv(dffix, trial)
+def set_font_from_chars_list(trial):
+    if "chars_list" in trial:
+        chars_df = pd.DataFrame(trial["chars_list"])
+        line_diffs = np.diff(chars_df.char_y_center.unique())
+        y_diffs = np.unique(line_diffs)
+        if len(y_diffs) == 1:
+            y_diff = y_diffs[0]
+        else:
+            y_diff = np.min(y_diffs)
+        y_diff = round(y_diff * 2) / 2
+    else:
+        y_diff = 1 / 0.333 * 18
+    font_size = y_diff * 0.333  # pixel to point conversion
+    return round((font_size)*4,ndigits=0)/4
+def get_font_and_font_size_from_trial(trial):
+    font_face, font_size, dpi, screen_res = get_plot_props(trial, AVAILABLE_FONTS)
+    if font_size is None and "font_size" in trial:
+        font_size = trial["font_size"]
+    elif font_size is None:
+        font_size = set_font_from_chars_list(trial)
+    return font_face, font_size
+def sigmoid(x):
+    return 1 / (1 + np.exp(-1 * x))
+def matplotlib_plot_df(
+    dffix,
+    trial,
+    algo_choice,
+    stimulus_prefix="word",
+    desired_dpi=300,
+    fix_to_plot=[],
+    stim_info_to_plot=["Words", "Word boxes"],
+    box_annotations=None,
+):
+    chars_df = pd.DataFrame(trial["chars_list"]) if "chars_list" in trial else None
+    if chars_df is not None:
+        font_face, font_size = get_font_and_font_size_from_trial(trial)
+        font_size = font_size * 0.65
+    else:
+        st.warning("No character or word information available to plot")
+    if "display_coords" in trial:
+        desired_width_in_pixels = trial["display_coords"][2] + 1
+        desired_height_in_pixels = trial["display_coords"][3] + 1
+    else:
+        desired_width_in_pixels = 1920
+        desired_height_in_pixels = 1080
+    figure_width = desired_width_in_pixels / desired_dpi
+    figure_height = desired_height_in_pixels / desired_dpi
+    fig = plt.figure(figsize=(figure_width, figure_height), dpi=desired_dpi)
+    ax = fig.add_subplot(1, 1, 1)
+    fig.subplots_adjust(bottom=0)
+    fig.subplots_adjust(top=1)
+    fig.subplots_adjust(right=1)
+    fig.subplots_adjust(left=0)
+    if "font" in trial and trial["font"] in AVAILABLE_FONTS:
+        font_to_use = trial["font"]
+    else:
+        font_to_use = "DejaVu Sans Mono"
+    if "font_size" in trial:
+        font_size = trial["font_size"]
+    else:
+        font_size = 20
+    if f"{stimulus_prefix}s_list" in trial:
+        add_text_to_ax(
+            trial[f"{stimulus_prefix}s_list"],
+            ax,
+            font_to_use,
+            prefix=stimulus_prefix,
+            fontsize=font_size / 3.89,
+            plot_text=False,
+            plot_boxes=True if "Word boxes" in stim_info_to_plot else False,
+            box_annotations=box_annotations,
+        )
+    if "chars_list" in trial:
+        add_text_to_ax(
+            trial["chars_list"],
+            ax,
+            font_to_use,
+            prefix="char",
+            fontsize=font_size / 3.89,
+            plot_text=True if "Words" in stim_info_to_plot else False,
+            plot_boxes=False,
+            box_annotations=None,
+        )
+    if "Uncorrected Fixations" in fix_to_plot:
+        ax.plot(dffix.x, dffix.y, label="Raw fixations", color="blue", alpha=0.6, linewidth=0.6)
+        x0 = dffix.x.iloc[range(len(dffix.x) - 1)].values
+        x1 = dffix.x.iloc[range(1, len(dffix.x))].values
+        y0 = dffix.y.iloc[range(len(dffix.y) - 1)].values
+        y1 = dffix.y.iloc[range(1, len(dffix.y))].values
+        xpos = x0
+        ypos = y0
+        xdir = x1 - x0
+        ydir = y1 - y0
+        for X, Y, dX, dY in zip(xpos, ypos, xdir, ydir):
+            ax.annotate(
+                "",
+                xytext=(X, Y),
+                xy=(X + 0.001 * dX, Y + 0.001 * dY),
+                arrowprops=dict(arrowstyle="fancy", color="blue"),
+                size=8,
+                alpha=0.3,
+            )
+    if "Corrected Fixations" in fix_to_plot:
+        if isinstance(algo_choice, list):
+            algo_choices = algo_choice
+            repeats = range(len(algo_choice))
+        else:
+            algo_choices = [algo_choice]
+            repeats = range(1)
+        for algoIdx in repeats:
+            algo_choice = algo_choices[algoIdx]
+            if f"y_{algo_choice}" in dffix.columns:
+                ax.plot(
+                    dffix.x,
+                    dffix.loc[:, f"y_{algo_choice}"],
+                    label="Raw fixations",
+                    color=COLORS[algoIdx],
+                    alpha=0.6,
+                    linewidth=0.6,
+                )
+                x0 = dffix.x.iloc[range(len(dffix.x) - 1)].values
+                x1 = dffix.x.iloc[range(1, len(dffix.x))].values
+                y0 = dffix.loc[:, f"y_{algo_choice}"].iloc[range(len(dffix.loc[:, f"y_{algo_choice}"]) - 1)].values
+                y1 = dffix.loc[:, f"y_{algo_choice}"].iloc[range(1, len(dffix.loc[:, f"y_{algo_choice}"]))].values
+                xpos = x0
+                ypos = y0
+                xdir = x1 - x0
+                ydir = y1 - y0
+                for X, Y, dX, dY in zip(xpos, ypos, xdir, ydir):
+                    ax.annotate(
+                        "",
+                        xytext=(X, Y),
+                        xy=(X + 0.001 * dX, Y + 0.001 * dY),
+                        arrowprops=dict(arrowstyle="fancy", color=COLORS[algoIdx]),
+                        size=8,
+                        alpha=0.3,
+                    )
+    ax.set_xlim((0, desired_width_in_pixels))
+    ax.set_ylim((0, desired_height_in_pixels))
+    ax.invert_yaxis()
+    return fig, desired_width_in_pixels, desired_height_in_pixels
+def plotly_plot_with_image(
+    dffix,
+    trial,
+    algo_choice,
+    to_plot_list=["Uncorrected Fixations", "Words", "corrected fixations", "Word boxes"],
+    scale_factor=0.5,
+):
+    fig, img_width, img_height = matplotlib_plot_df(
+        dffix, trial, algo_choice, desired_dpi=300, fix_to_plot=[], stim_info_to_plot=to_plot_list
+    )
+    fig.savefig(TEMP_FIGURE_STIMULUS_PATH)
+    fig = go.Figure()
+    fig.add_trace(
+        go.Scatter(
+            x=[0, img_width * scale_factor],
+            y=[img_height * scale_factor, 0],
+            mode="markers",
+            marker_opacity=0,
+            name="scale_helper",
+        )
+    )
+    fig.update_xaxes(visible=False, range=[0, img_width * scale_factor])
+    fig.update_yaxes(
+        visible=False,
+        range=[img_height * scale_factor, 0],
+        scaleanchor="x",
+    )
+    if "Words" in to_plot_list or "Word boxes" in to_plot_list:
+        imsource = Image.open(str(TEMP_FIGURE_STIMULUS_PATH))
+        fig.add_layout_image(
+            dict(
+                x=0,
+                sizex=img_width * scale_factor,
+                y=0,
+                sizey=img_height * scale_factor,
+                xref="x",
+                yref="y",
+                opacity=1.0,
+                layer="below",
+                sizing="stretch",
+                source=imsource,
+            )
+        )
+    if "Uncorrected Fixations" in to_plot_list:
+        duration_scaled = dffix.duration - dffix.duration.min()
+        duration_scaled = ((duration_scaled / duration_scaled.max()) - 0.5) * 3
+        duration = sigmoid(duration_scaled) * 50 * scale_factor
+        fig.add_trace(
+            go.Scatter(
+                x=dffix.x * scale_factor,
+                y=dffix.y * scale_factor,
+                mode="markers+lines+text",
+                name="Raw fixations",
+                marker=dict(
+                    color=COLORS[-1],
+                    symbol="arrow",
+                    size=duration.values,
+                    angleref="previous",
+                    line=dict(color="black", width=duration.values / 10),
+                ),
+                line_width=2 * scale_factor,
+                text=np.arange(len(dffix.x)),
+                textposition="middle right",
+                textfont=dict(
+                    family="sans serif",
+                    size=18 * scale_factor,
+                ),
+                hoverinfo="text+x+y",
+                opacity=0.9,
+            )
+        )
+    if "Corrected Fixations" in to_plot_list:
+        if isinstance(algo_choice, list):
+            algo_choices = algo_choice
+            repeats = range(len(algo_choice))
+        else:
+            algo_choices = [algo_choice]
+            repeats = range(1)
+        for algoIdx in repeats:
+            algo_choice = algo_choices[algoIdx]
+            if f"y_{algo_choice}" in dffix.columns:
+                fig.add_trace(
+                    go.Scatter(
+                        x=dffix.x * scale_factor,
+                        y=dffix.loc[:, f"y_{algo_choice}"] * scale_factor,
+                        mode="markers",
+                        name=f"{algo_choice} corrected",
+                        marker_color=COLORS[algoIdx],
+                        marker_size=10 * scale_factor,
+                        hoverinfo="text+x+y",
+                        opacity=0.75,
+                    )
+                )
+    fig.update_layout(
+        plot_bgcolor=None,
+        width=img_width * scale_factor,
+        height=img_height * scale_factor,
+        margin={"l": 0, "r": 0, "t": 0, "b": 0},
+        legend=dict(orientation="h", yanchor="bottom", y=1.05, xanchor="right", x=0.8),
+    )
+    for trace in fig["data"]:
+        if trace["name"] == "scale_helper":
+            trace["showlegend"] = False
+    return fig
+def plot_y_corr(dffix, algo_choice, margin=dict(t=40, l=10, r=10, b=1)):
+    num_datapoints = len(dffix.x)
+    layout = dict(
+        plot_bgcolor="white",
+        autosize=True,
+        margin=margin,
+        xaxis=dict(
+            title="Fixation Index",
+            linecolor="black",
+            range=[-1, num_datapoints + 1],
+            showgrid=False,
+            mirror="all",
+            showline=True,
+        ),
+        yaxis=dict(
+            title="y correction",
+            side="left",
+            linecolor="black",
+            showgrid=False,
+            mirror="all",
+            showline=True,
+        ),
+        legend=dict(orientation="v", yanchor="middle", y=0.95, xanchor="left", x=1.05),
+    )
+    if isinstance(dffix, dict):
+        dffix = dffix["value"]
+    algo_string = algo_choice[0] if isinstance(algo_choice, list) else algo_choice
+    if f"y_{algo_string}_correction" not in dffix.columns:
+        st.session_state["logger"].warning("No correction column found in dataframe")
+        return go.Figure(layout=layout)
+    if isinstance(dffix, dict):
+        dffix = dffix["value"]
+    fig = go.Figure(layout=layout)
+    if isinstance(algo_choice, list):
+        algo_choices = algo_choice
+        repeats = range(len(algo_choice))
+    else:
+        algo_choices = [algo_choice]
+        repeats = range(1)
+    for algoIdx in repeats:
+        algo_choice = algo_choices[algoIdx]
+        fig.add_trace(
+            go.Scatter(
+                x=np.arange(num_datapoints),
+                y=dffix.loc[:, f"y_{algo_choice}_correction"],
+                mode="markers",
+                name=f"{algo_choice} y correction",
+                marker_color=COLORS[algoIdx],
+                marker_size=3,
+                showlegend=True,
+            )
+        )
+    fig.update_yaxes(zeroline=True, zerolinewidth=1, zerolinecolor="black")
+    return fig
+def download_example_ascs(EXAMPLES_FOLDER, EXAMPLES_ASC_ZIP_FILENAME, OSF_DOWNLAOD_LINK, EXAMPLES_FOLDER_PATH):
+    if not os.path.isdir(EXAMPLES_FOLDER):
+        os.mkdir(EXAMPLES_FOLDER)
+    if not os.path.exists(EXAMPLES_ASC_ZIP_FILENAME):
+        download_url(OSF_DOWNLAOD_LINK, EXAMPLES_ASC_ZIP_FILENAME)
+        # os.system(f'''wget -O {EXAMPLES_ASC_ZIP_FILENAME} -c --read-timeout=5 --tries=0 "{OSF_DOWNLAOD_LINK}"''')
+    if os.path.exists(EXAMPLES_ASC_ZIP_FILENAME):
+        if EXAMPLES_FOLDER_PATH.exists():
+            EXAMPLE_ASC_FILES = [x for x in EXAMPLES_FOLDER_PATH.glob("*.asc")]
+        if len(EXAMPLE_ASC_FILES) != 4:
+            try:
+                with zipfile.ZipFile(EXAMPLES_ASC_ZIP_FILENAME, "r") as zip_ref:
+                    zip_ref.extractall(EXAMPLES_FOLDER)
+            except Exception as e:
+                st.session_state["logger"].warning(e)
+                st.session_state["logger"].warning(f"Extracting {EXAMPLES_ASC_ZIP_FILENAME} failed")
+        EXAMPLE_ASC_FILES = [x for x in EXAMPLES_FOLDER_PATH.glob("*.asc")]
+    return EXAMPLE_ASC_FILES
+def process_trial_choice_single_csv(trial, algo_choice, file=None):
+    trial_id = trial["trial_id"]
+    if "dffix" in trial:
+        dffix = trial["dffix"]
+    else:
+        if file is None:
+            file = st.session_state["single_csv_file"]
+        trial["plot_file"] = str(PLOTS_FOLDER.joinpath(f"{file.name}_{trial_id}_2ndInput_chars_channel_sep.png"))
+        trial["fname"] = str(file.name)
+        dffix = trial["dffix"] = st.session_state["trials_by_ids_single_csv"][trial_id]["dffix"]
+    font, font_size, dpi, screen_res = get_plot_props(trial, AVAILABLE_FONTS)
+    chars_df = pd.DataFrame(trial["chars_list"])
+    trial["chars_df"] = chars_df.to_dict()
+    trial["y_char_unique"] = list(chars_df.char_y_center.sort_values().unique())
+    if algo_choice is not None:
+        dffix, _ = correct_df(dffix, algo_choice, trial)
+    return dffix, trial, dpi, screen_res, font, font_size
+def add_default_font_and_character_props_to_state(trial):
+    chars_list = trial["chars_list"]
+    chars_df = pd.DataFrame(trial["chars_list"])
+    line_diffs = np.diff(chars_df.char_y_center.unique())
+    y_diffs = np.unique(line_diffs)
+    if len(y_diffs) == 1:
+        y_diff = y_diffs[0]
+    else:
+        y_diff = np.min(y_diffs)
+    y_diff = round(y_diff * 2) / 2
+    x_txt_start = chars_list[0]["char_xmin"]
+    y_txt_start = chars_list[0]["char_y_center"]
+    font_face, font_size = get_font_and_font_size_from_trial(trial)
+    line_height = y_diff
+    return y_diff, x_txt_start, y_txt_start, font_face, font_size, line_height
+def get_all_measures(trial, dffix, prefix, use_corrected_fixations=True, correction_algo="warp"):
+    if use_corrected_fixations:
+        dffix_copy = copy.deepcopy(dffix)
+        dffix_copy["y"] = dffix_copy[f"y_{correction_algo}"]
+    else:
+        dffix_copy = dffix
+    initial_landing_position_own_vals = anf.initial_landing_position_own(trial, dffix_copy, prefix).set_index(
+        f"{prefix}_index"
+    )
+    second_pass_duration_own_vals = anf.second_pass_duration_own(trial, dffix_copy, prefix).set_index(f"{prefix}_index")
+    number_of_fixations_own_vals = anf.number_of_fixations_own(trial, dffix_copy, prefix).set_index(f"{prefix}_index")
+    initial_fixation_duration_own_vals = anf.initial_fixation_duration_own(trial, dffix_copy, prefix).set_index(
+        f"{prefix}_index"
+    )
+    first_of_many_duration_own_vals = anf.first_of_many_duration_own(trial, dffix_copy, prefix).set_index(
+        f"{prefix}_index"
+    )
+    total_fixation_duration_own_vals = anf.total_fixation_duration_own(trial, dffix_copy, prefix).set_index(
+        f"{prefix}_index"
+    )
+    gaze_duration_own_vals = anf.gaze_duration_own(trial, dffix_copy, prefix).set_index(f"{prefix}_index")
+    go_past_duration_own_vals = anf.go_past_duration_own(trial, dffix_copy, prefix).set_index(f"{prefix}_index")
+    initial_landing_distance_own_vals = anf.initial_landing_distance_own(trial, dffix_copy, prefix).set_index(
+        f"{prefix}_index"
+    )
+    landing_distances_own_vals = anf.landing_distances_own(trial, dffix_copy, prefix).set_index(f"{prefix}_index")
+    number_of_regressions_in_own_vals = anf.number_of_regressions_in_own(trial, dffix_copy, prefix).set_index(
+        f"{prefix}_index"
+    )
+    own_measure_df = pd.concat(
+        [
+            df.drop(prefix, axis=1)
+            for df in [
+                number_of_fixations_own_vals,
+                initial_fixation_duration_own_vals,
+                first_of_many_duration_own_vals,
+                total_fixation_duration_own_vals,
+                gaze_duration_own_vals,
+                go_past_duration_own_vals,
+                second_pass_duration_own_vals,
+                initial_landing_position_own_vals,
+                initial_landing_distance_own_vals,
+                landing_distances_own_vals,
+                number_of_regressions_in_own_vals,
+            ]
+        ],
+        axis=1,
+    )
+    own_measure_df[prefix] = number_of_fixations_own_vals[prefix]
+    first_column = own_measure_df.pop(prefix)
+    own_measure_df.insert(0, prefix, first_column)
+    own_measure_df.insert(0, f"{prefix}_num", np.arange((own_measure_df.shape[0])))
+    return own_measure_df