From 8abefe28148b0dd1fb13a4929c2113a60f85bf70 Mon Sep 17 00:00:00 2001
From: phga <toerd@posteo.de>
Date: Sun, 18 Jul 2021 21:07:15 +0200
Subject: [PATCH] update: results own

---
 chap3/implementation.tex |  6 +--
 chap5/results.tex        | 83 +++++++++++++++++++++++++++++++++++++++-
 glossary.tex             |  6 ++-
 ref_shelf.bib            | 38 ++++++++++++++++++
 4 files changed, 127 insertions(+), 6 deletions(-)

diff --git a/chap3/implementation.tex b/chap3/implementation.tex
index f813e84..5ee9456 100644
--- a/chap3/implementation.tex
+++ b/chap3/implementation.tex
@@ -121,9 +121,6 @@ C  = TL - INF;
 CER = roundToPrecision(IF / (TL + IF), 5);
 UER = roundToPrecision(INF / (TL + IF), 5);
 TER = roundToPrecision((INF + IF)/(TL + IF), 5);
-KSPC = roundToPrecision(ISL / TL, 5);
-// Correct / Any input char
-accuracy = roundToPrecision(C / (TL + IF) * 100, 2);
 
 // Speed metrics
 // TL - 1 because the first char is entered at 0 seconds
@@ -132,6 +129,9 @@ AdjWPM = roundToPrecision(WPM * Math.pow((1 - UER), a), 2);
 KSPS = roundToPrecision((ISL - 1) / TEST_TIME, 5);
 \end{minted}
 \end{listing}
+% // Correct / Any input char
+% accuracy = roundToPrecision(C / (TL + IF) * 100, 2);
+% KSPC = roundToPrecision(ISL / TL, 5);
 
 For further implementation details on how input was captured or sent to the
 backend refer to the code in the online repository \footnote{TODO: GITHUB}.
diff --git a/chap5/results.tex b/chap5/results.tex
index b5e171b..079b22f 100644
--- a/chap5/results.tex
+++ b/chap5/results.tex
@@ -6,5 +6,84 @@
 This section addresses the statistical analysis of the data obtained throughout
 the main, within-subject, user study (n = 24) that consisted of five repeated
 measurements. Because the data was from related, dependent groups, we used
-repeated measurement \gls{ANOVA} if all required assumption were met and
-Friedman's Test otherwise.
+\textit{Repeated Measurement \gls{ANOVA}} if all required assumption were met
+and \textit{Friedman's Test} otherwise. To identify the specific pairs of
+treatments that differed significantly, we ran either \textit{Dependent T-Tests}
+or \textit{Wilcoxon Signed Rank Tests} (both with \textit{Holm correction
+  (sequetially rejective Bonferroni test)} \cite{holm_correction}) as post-hoc
+tests \cite{field_stats, downey_stats}. The reliability of the two sub-scales
+(hedonic and pragmatic quality) in the \glsfirst{UEQ-S} was estimated using
+\textit{Cronbach's alpha} \cite{tavakol_cronbachs_alpha}. All results are
+reported statistically significant with an $\alpha$-level of $p < 0.05$. We used
+95\% confidence intervals in visualizations of certain results. Normality of
+data or residuals was checked using visual assessment of \gls{Q-Q} plots and
+additionally \textit{Shapiro-Wilk} Test \cite{field_stats, downey_stats}.
+
+\subsubsection{Own Keyboard \& Reference Values}
+\label{sec:res_OPC}
+As mentioned in Section \ref{sec:main_design}, the keyboard \textit{Own} was
+used as a reference for some metrics captured during the experiment. Since the
+measurements with \textit{Own} took place at the start (T0\_1) and end (T0\_2)
+of the experiment, we compared the results of both typing tests to detect
+possible variations in performance due to fatigue. Using dependent T-tests, we
+found that there were no significant differences in \glsfirst{KSPS} for T0\_1 (M
+= 5.39, sd = 1.49) compared to T0\_2 (M = 5.47, sd = 1.48, t = -1.53, p =
+0.139), \glsfirst{UER} was overall negligible with T0\_1 (M = 0.005, sd = 0.013,
+85th percentile = 0.0051) and T0\_2 (M = 0.008, sd = 0.028, 85th percentile =
+0.0052) and \glsfirst{WPM} showed a trend to approach significance with T0\_1 (M
+= 54.2, sd = 14.7) compared to T0\_2 (M = 53.0, sd = 14.5, t = 1.92, p =
+0.067). Further, using dependent T-tests we were able to find statistically
+significant differences in \glsfirst{AdjWPM} for T0\_1 (M = 53.9, sd = 14.5) and
+T0\_2 (M = 52.5, sd = 14.3, t = 2.44, p = 0.023), \glsfirst{CER} for T0\_1 (M =
+0.057, sd = 0.028) and T0\_2 (M = 0.078, sd = 0.038, t = -3.54, p = 0.002) and
+\glsfirst{TER} for T0\_1 (M = 0.063, sd = 0.031) and T0\_2 (M = 0.086, sd =
+0.039, t = -4.27, p = 0.0003). Because of the differences, we decided to use the
+means of all metrics gathered for each participant through T0\_1 and T0\_2 as
+the reference values to compute the \textit{\gls{OPC}} for the test keyboards
+(\textit{Athena, Aphrodite, Nyx} and \textit{Hera}).
+
+Additionally, using a dependent T-test, we compared the muscle activity (\% of
+\glsfirst{MVC}) and found, that there are significant differences in left flexor
+(\glsfirst{FDP} \& \glsfirst{FDS}) \%\gls{MVC} for T0\_1 (M = 12.0, sd = 8.27)
+and T0\_2 (M = 8.53, sd = 7.16, t = 3.18, p = 0.004). Residuals of right flexor
+(\gls{FDF} \& \gls{FDS}) were not normally distributed, therefore we used the
+Wilcoxon Signed Rank Test and found an significant difference for T0\_1 (M =
+10.8, sd = 8.18, Med = 9.52) and T0\_2 (M = 7.71, sd = 6.08, Med = 5.32, p =
+0.021). It has to be noted, that we had to remove two erroneous measurements for
+the right flexor (n = 22). No significant differences have been found in left or
+right extensor (\glsfirst{ED}) \%\gls{MVC} between T0\_1 and T0\_2.
+
+\begin{table}[ht]
+  \centering
+  \ra{1.3}
+  \begin{tabular}{?l^l^l^l^l^l^l^l}
+    \toprule
+    \rowstyle{\itshape}
+    Y      & Comparison    & Statistic & p      & Estimate & CI             & Method & Alternative \\
+    \midrule
+    WPM    & T0\_1 - T0\_2 & 1.92      & 0.07   & 1.18     & [-0.09, 2.45]  & T-test & two.sided   \\
+    AdjWPM & T0\_1 - T0\_2 & 2.44      & 0.02*  & 1.35     & [0.21, 2.50]   & T-test & two.sided   \\
+    KSPS   & T0\_1 - T0\_2 & -1.53     & 0.14   & -0.08    & [-0.19, 0.03]  & T-test & two.sided   \\
+    CER    & T0\_1 - T0\_2 & -3.54     & 0.00*  & -0.02    & [-0.03, -0.01] & T-test & two.sided   \\
+    TER    & T0\_1 - T0\_2 & -4.27     & 0.00*  & -0.02    & [-0.03, -0.01] & T-test & two.sided   \\
+    \%MVC_{LF} & T0\_1 - T0\_2 & 3.18      & 0.004* & 3.44     & [1.20, 5.68]   & T-test & two.sided   \\
+    \%MVC_{LE} & T0\_1 - T0\_2 & 1.44      & 0.163  & 0.956    & [-0.42, 2.33]  & T-test & two.sided   \\
+
+    \%MVC_{RF} & T0\_1 - T0\_2 & 3.18 & 0.004* & 3.44 & [1.20, 5.68] & T-test & two.sided \\
+    \%MVC_{RE} & T0\_1 - T0\_2 & 3.18 & 0.004  & 3.44 & [1.20, 5.68] & T-test & two.sided \\
+    \bottomrule
+  \end{tabular}
+\end{table}
+
+\subsection{Performance Metrics}
+\label{sec:res_perf}
+\subsubsection{Typing Speed}
+\label{sec:res_typing_speed}
+The typing speed for each individual keyboard and typing test was automatically
+captured with the help of the typing test functionality offered by
+\glsfirst{GoTT}. We captured \gls{WPM}, \gls{AdjWPM} and
+\gls{KSPS} according to the formulas mentioned in Section
+\ref{sec:meas_perf}. The individual measurements were then converted into
+percentage values of the mean of the reference values gathered from typing tests
+with keyboard \textit{Own}. None of the gathered data for the individual
+treatments was distributed normally and thus, Friedman's Test was applied.
diff --git a/glossary.tex b/glossary.tex
index ebbb7c6..206f2dd 100644
--- a/glossary.tex
+++ b/glossary.tex
@@ -36,7 +36,11 @@
 \newacronym{OLED}{OLED}{organic light-emitting diode}
 \newacronym{GMMK}{GMMK}{Glorious Modular Mechanical Keyboards}
 
-\newacronym{ANOVA}{ANOVA}{analysis of variance}
+\newacronym{ANOVA}{ANOVA}{Analysis Of Variance}
+\newacronym{Q-Q}{Q-Q}{quantile-quantile}
+\newacronym{OPC}{OPC}{percentage of keyboard ``Own''}
+
+
 
 \newglossaryentry{N}{
 name={N},
diff --git a/ref_shelf.bib b/ref_shelf.bib
index 893442e..865e796 100644
--- a/ref_shelf.bib
+++ b/ref_shelf.bib
@@ -872,4 +872,42 @@ urldate = {2021-07-06}
   pages={525--528},
   year={1958},
   publisher={Taylor \& Francis}
+}
+
+@article{holm_correction,
+ ISSN = {03036898, 14679469},
+ URL = {http://www.jstor.org/stable/4615733},
+ abstract = {This paper presents a simple and widely applicable multiple test procedure of the sequentially rejective type, i.e. hypotheses are rejected one at a time until no further rejections can be done. It is shown that the test has a prescribed level of significance protection against error of the first kind for any combination of true hypotheses. The power properties of the test and a number of possible applications are also discussed.},
+ author = {Sture Holm},
+ journal = {Scandinavian Journal of Statistics},
+ number = {2},
+ pages = {65--70},
+ publisher = {[Board of the Foundation of the Scandinavian Journal of Statistics, Wiley]},
+ title = {A Simple Sequentially Rejective Multiple Test Procedure},
+ volume = {6},
+ year = {1979}
+}
+
+@book{field_stats,
+  title={Discovering statistics using R},
+  author={Field, Andy and Miles, Jeremy and Field, Zo{\"e}},
+  year={2012},
+  publisher={Sage publications}
+}
+
+@book{downey_stats,
+  title={Think stats: exploratory data analysis},
+  author={Downey, Allen},
+  year={2014},
+  publisher={" O'Reilly Media, Inc."}
+}
+
+@article{tavakol_cronbachs_alpha,
+  title={Making sense of Cronbach's alpha},
+  author={Tavakol, Mohsen and Dennick, Reg},
+  journal={International journal of medical education},
+  volume={2},
+  pages={53},
+  year={2011},
+  publisher={IJME}
 }
\ No newline at end of file