51316a1a9c67895425d1457e00214f2442dd4fd1 — DaniĆ«l de Kok 3 months ago d0f4cf0
Restructure encoder modules

- The top-level depparse module is renamed to encoder::deprel.
- The Layer{Encoder,Decoder} are moved to the encoder::layer module.
17 files changed, 118 insertions(+), 99 deletions(-)

M sticker-utils/src/bin/sticker-dep2label.rs
M sticker-utils/src/bin/sticker-prepare.rs
M sticker-utils/src/bin/sticker-pretrain.rs
M sticker-utils/src/bin/sticker-train.rs
M sticker-utils/src/serialization.rs
M sticker-utils/src/tagger_wrapper.rs
M sticker/src/collector.rs
R sticker/src/depparse/{errors.rs => eprel/errors.rs}
R sticker/src/depparse/{mod.rs => eprel/mod.rs}
R sticker/src/depparse/{post_processing.rs => eprel/post_processing.rs}
R sticker/src/depparse/{relative_pos.rs => eprel/relative_pos.rs}
R sticker/src/depparse/{relative_position.rs => eprel/relative_position.rs}
A sticker/src/encoder/layer/mod.rs
R sticker/src/{encoder.rs => encoder/mod.rs}
M sticker/src/lib.rs
M sticker/src/tensorflow/dataset.rs
M sticker/src/tensorflow/tagger.rs
M sticker-utils/src/bin/sticker-dep2label.rs => sticker-utils/src/bin/sticker-dep2label.rs +2 -2
@@ 7,8 7,8 @@
 use conllx::token::Features;
 use stdinout::{Input, OrExit, Output};
 
-use sticker::depparse::{RelativePOSEncoder, RelativePositionEncoder};
-use sticker::SentenceEncoder;
+use sticker::encoder::deprel::{RelativePOSEncoder, RelativePositionEncoder};
+use sticker::encoder::SentenceEncoder;
 
 static ENCODER: &str = "ENCODER";
 static INPUT: &str = "INPUT";

M sticker-utils/src/bin/sticker-prepare.rs => sticker-utils/src/bin/sticker-prepare.rs +4 -4
@@ 9,10 9,10 @@
 use serde_derive::Serialize;
 use stdinout::{Input, OrExit, Output};
 
-use sticker::depparse::{RelativePOSEncoder, RelativePositionEncoder};
-use sticker::{
-    Collector, Embeddings, LayerEncoder, NoopCollector, Numberer, SentVectorizer, SentenceEncoder,
-};
+use sticker::encoder::deprel::{RelativePOSEncoder, RelativePositionEncoder};
+use sticker::encoder::layer::LayerEncoder;
+use sticker::encoder::SentenceEncoder;
+use sticker::{Collector, Embeddings, NoopCollector, Numberer, SentVectorizer};
 use sticker_utils::{sticker_app, CborWrite, Config, EncoderType, LabelerType, TomlRead};
 
 static CONFIG: &str = "CONFIG";

M sticker-utils/src/bin/sticker-pretrain.rs => sticker-utils/src/bin/sticker-pretrain.rs +4 -2
@@ 8,9 8,11 @@
 use indicatif::ProgressStyle;
 use ordered_float::NotNan;
 use stdinout::OrExit;
-use sticker::depparse::{RelativePOSEncoder, RelativePositionEncoder};
+use sticker::encoder::deprel::{RelativePOSEncoder, RelativePositionEncoder};
+use sticker::encoder::layer::LayerEncoder;
+use sticker::encoder::{CategoricalEncoder, SentenceEncoder};
 use sticker::tensorflow::{ConllxDataSet, DataSet, TaggerGraph, TaggerTrainer};
-use sticker::{CategoricalEncoder, LayerEncoder, Numberer, SentVectorizer, SentenceEncoder};
+use sticker::{Numberer, SentVectorizer};
 use sticker_utils::{
     sticker_app, CborRead, CompletedUnit, Config, EncoderType, LabelerType, ReadProgress,
     SaveSchedule, SaveScheduler, TomlRead,

M sticker-utils/src/bin/sticker-train.rs => sticker-utils/src/bin/sticker-train.rs +4 -2
@@ 8,11 8,13 @@
 use indicatif::ProgressStyle;
 use ordered_float::NotNan;
 use stdinout::OrExit;
-use sticker::depparse::{RelativePOSEncoder, RelativePositionEncoder};
+use sticker::encoder::deprel::{RelativePOSEncoder, RelativePositionEncoder};
+use sticker::encoder::layer::LayerEncoder;
+use sticker::encoder::{CategoricalEncoder, SentenceEncoder};
 use sticker::tensorflow::{
     ConllxDataSet, DataSet, LearningRateSchedule, PlateauLearningRate, TaggerGraph, TaggerTrainer,
 };
-use sticker::{CategoricalEncoder, LayerEncoder, Numberer, SentVectorizer, SentenceEncoder};
+use sticker::{Numberer, SentVectorizer};
 use sticker_utils::{
     sticker_app, CborRead, CompletedUnit, Config, EncoderType, LabelerType, ReadProgress,
     SaveSchedule, SaveScheduler, TomlRead,

M sticker-utils/src/serialization.rs => sticker-utils/src/serialization.rs +1 -1
@@ 1,7 1,7 @@
 use std::io::{Read, Write};
 
 use failure::Error;
-use sticker::depparse::{DependencyEncoding, RelativePOS, RelativePosition};
+use sticker::encoder::deprel::{DependencyEncoding, RelativePOS, RelativePosition};
 use sticker::Numberer;
 
 use serde_cbor;

M sticker-utils/src/tagger_wrapper.rs => sticker-utils/src/tagger_wrapper.rs +4 -2
@@ 6,10 6,12 @@
 
 use crate::config::{Config, EncoderType, LabelerType};
 use crate::serialization::CborRead;
-use sticker::depparse::{RelativePOSEncoder, RelativePositionEncoder};
+use sticker::encoder::deprel::{RelativePOSEncoder, RelativePositionEncoder};
+use sticker::encoder::layer::LayerEncoder;
+use sticker::encoder::{CategoricalEncoder, SentenceDecoder};
 use sticker::tensorflow::{Tagger, TaggerGraph};
 use sticker::Tag;
-use sticker::{CategoricalEncoder, LayerEncoder, Numberer, SentVectorizer, SentenceDecoder};
+use sticker::{Numberer, SentVectorizer};
 
 /// The `Tag` trait is not object-safe, since the `tag_sentences`
 /// method has a type parameter to accept a slice of mutably

M sticker/src/collector.rs => sticker/src/collector.rs +2 -1
@@ 3,7 3,8 @@
 use conllx::graph::Sentence;
 use failure::Error;
 
-use crate::{Numberer, SentVectorizer, SentenceEncoder};
+use crate::encoder::SentenceEncoder;
+use crate::{Numberer, SentVectorizer};
 
 /// Data types collects (and typically stores) vectorized sentences.
 pub trait Collector {

R sticker/src/depparse/errors.rs => sticker/src/encoder/deprel/errors.rs +0 -0

R sticker/src/depparse/mod.rs => sticker/src/encoder/deprel/mod.rs +7 -5
@@ 1,16 1,18 @@
+//! Dependency relation encoders.
+
 use serde_derive::{Deserialize, Serialize};
 
 mod errors;
-pub use crate::depparse::errors::*;
+pub use self::errors::*;
 
 mod post_processing;
-pub(crate) use crate::depparse::post_processing::*;
+pub(crate) use self::post_processing::*;
 
 mod relative_position;
-pub use crate::depparse::relative_position::*;
+pub use self::relative_position::*;
 
 mod relative_pos;
-pub use crate::depparse::relative_pos::*;
+pub use self::relative_pos::*;
 
 /// Encoding of a dependency relation as a token label.
 #[derive(Clone, Debug, Deserialize, Eq, Hash, PartialEq, Serialize)]


@@ 41,7 43,7 @@
     use conllx::io::Reader;
 
     use super::{RelativePOSEncoder, RelativePositionEncoder};
-    use crate::{EncodingProb, SentenceDecoder, SentenceEncoder};
+    use crate::encoder::{EncodingProb, SentenceDecoder, SentenceEncoder};
 
     static NON_PROJECTIVE_DATA: &'static str = "testdata/nonprojective.conll";
 

R sticker/src/depparse/post_processing.rs => sticker/src/encoder/deprel/post_processing.rs +3 -3
@@ 3,7 3,7 @@
 use petgraph::algo::tarjan_scc;
 
 use super::DependencyEncoding;
-use crate::EncodingProb;
+use crate::encoder::EncodingProb;
 
 static ROOT_RELATION: &'static str = "ROOT";
 


@@ 158,10 158,10 @@
     use conllx::token::TokenBuilder;
 
     use super::{attach_orphans, break_cycles, find_or_create_root, first_root, ROOT_RELATION};
-    use crate::depparse::{
+    use crate::encoder::deprel::{
         pos_position_table, DependencyEncoding, RelativePOS, RelativePOSEncoder,
     };
-    use crate::{EncodingProb, SentenceEncoder};
+    use crate::encoder::{EncodingProb, SentenceEncoder};
 
     fn test_graph() -> Sentence {
         let mut sent = Sentence::new();

R sticker/src/depparse/relative_pos.rs => sticker/src/encoder/deprel/relative_pos.rs +3 -3
@@ 7,7 7,7 @@
 use super::{
     attach_orphans, break_cycles, find_or_create_root, DecodeError, DependencyEncoding, EncodeError,
 };
-use crate::{EncodingProb, SentenceDecoder, SentenceEncoder};
+use crate::encoder::{EncodingProb, SentenceDecoder, SentenceEncoder};
 
 /// Relative head position by part-of-speech.
 ///


@@ 241,8 241,8 @@
     use conllx::token::TokenBuilder;
 
     use super::{RelativePOS, RelativePOSEncoder};
-    use crate::depparse::{DecodeError, DependencyEncoding};
-    use crate::{EncodingProb, SentenceDecoder};
+    use crate::encoder::deprel::{DecodeError, DependencyEncoding};
+    use crate::encoder::{EncodingProb, SentenceDecoder};
 
     // Small tests for relative part-of-speech encoder. Automatic
     // testing is performed in the module tests.

R sticker/src/depparse/relative_position.rs => sticker/src/encoder/deprel/relative_position.rs +3 -3
@@ 5,7 5,7 @@
 use super::{
     attach_orphans, break_cycles, find_or_create_root, DecodeError, DependencyEncoding, EncodeError,
 };
-use crate::{EncodingProb, SentenceDecoder, SentenceEncoder};
+use crate::encoder::{EncodingProb, SentenceDecoder, SentenceEncoder};
 
 /// Relative head position.
 ///


@@ 119,8 119,8 @@
     use conllx::token::TokenBuilder;
 
     use super::{RelativePosition, RelativePositionEncoder};
-    use crate::depparse::{DecodeError, DependencyEncoding};
-    use crate::{EncodingProb, SentenceDecoder};
+    use crate::encoder::deprel::{DecodeError, DependencyEncoding};
+    use crate::encoder::{EncodingProb, SentenceDecoder};
 
     // Small tests for the relative position encoder. Automatic
     // testing is performed in the module tests.

A sticker/src/encoder/layer/mod.rs => sticker/src/encoder/layer/mod.rs +64 -0
@@ 0,0 1,64 @@
+//! CoNLL-X layer encoder.
+
+use conllx::graph::{Node, Sentence};
+use failure::{format_err, Error};
+
+use super::{EncodingProb, SentenceDecoder, SentenceEncoder};
+use crate::{Layer, LayerValue};
+
+/// Encode sentences using a CoNLL-X layer.
+#[derive(Clone, Debug, Eq, PartialEq)]
+pub struct LayerEncoder {
+    layer: Layer,
+}
+
+impl LayerEncoder {
+    /// Construct a new layer encoder of the given layer.
+    pub fn new(layer: Layer) -> Self {
+        LayerEncoder { layer }
+    }
+}
+
+impl SentenceDecoder for LayerEncoder {
+    type Encoding = String;
+
+    fn decode<'a, S>(&self, labels: &[S], sentence: &mut Sentence) -> Result<(), Error>
+    where
+        S: AsRef<[EncodingProb<'a, Self::Encoding>]>,
+        Self::Encoding: 'a,
+    {
+        assert_eq!(
+            labels.len(),
+            sentence.len() - 1,
+            "Labels and sentence length mismatch"
+        );
+
+        for (token, token_labels) in sentence
+            .iter_mut()
+            .filter_map(Node::token_mut)
+            .zip(labels.iter())
+        {
+            if let Some(label) = token_labels.as_ref().get(0) {
+                token.set_value(&self.layer, label.encoding().as_str());
+            }
+        }
+
+        Ok(())
+    }
+}
+
+impl SentenceEncoder for LayerEncoder {
+    type Encoding = String;
+
+    fn encode(&mut self, sentence: &Sentence) -> Result<Vec<Self::Encoding>, Error> {
+        let mut encoding = Vec::with_capacity(sentence.len() - 1);
+        for token in sentence.iter().filter_map(Node::token) {
+            let label = token
+                .value(&self.layer)
+                .ok_or_else(|| format_err!("Token without a label: {}", token.form()))?;
+            encoding.push(label.to_owned());
+        }
+
+        Ok(encoding)
+    }
+}

R sticker/src/encoder.rs => sticker/src/encoder/mod.rs +12 -62
@@ 1,10 1,16 @@
+//! Label encoders.
+
 use std::borrow::{Borrow, Cow};
 use std::hash::Hash;
 
-use conllx::graph::{Node, Sentence};
-use failure::{format_err, Error};
+use conllx::graph::Sentence;
+use failure::Error;
+
+use crate::Numberer;
 
-use crate::{Layer, LayerValue, Numberer};
+pub mod deprel;
+
+pub mod layer;
 
 /// An encoding with its probability.
 pub struct EncodingProb<'a, E>


@@ 80,63 86,6 @@
     fn encode(&mut self, sentence: &Sentence) -> Result<Vec<Self::Encoding>, Error>;
 }
 
-/// Encode sentences using a CoNLL-X layer.
-#[derive(Clone, Debug, Eq, PartialEq)]
-pub struct LayerEncoder {
-    layer: Layer,
-}
-
-impl LayerEncoder {
-    /// Construct a new layer encoder of the given layer.
-    pub fn new(layer: Layer) -> Self {
-        LayerEncoder { layer }
-    }
-}
-
-impl SentenceDecoder for LayerEncoder {
-    type Encoding = String;
-
-    fn decode<'a, S>(&self, labels: &[S], sentence: &mut Sentence) -> Result<(), Error>
-    where
-        S: AsRef<[EncodingProb<'a, Self::Encoding>]>,
-        Self::Encoding: 'a,
-    {
-        assert_eq!(
-            labels.len(),
-            sentence.len() - 1,
-            "Labels and sentence length mismatch"
-        );
-
-        for (token, token_labels) in sentence
-            .iter_mut()
-            .filter_map(Node::token_mut)
-            .zip(labels.iter())
-        {
-            if let Some(label) = token_labels.as_ref().get(0) {
-                token.set_value(&self.layer, label.encoding().as_str());
-            }
-        }
-
-        Ok(())
-    }
-}
-
-impl SentenceEncoder for LayerEncoder {
-    type Encoding = String;
-
-    fn encode(&mut self, sentence: &Sentence) -> Result<Vec<Self::Encoding>, Error> {
-        let mut encoding = Vec::with_capacity(sentence.len() - 1);
-        for token in sentence.iter().filter_map(Node::token) {
-            let label = token
-                .value(&self.layer)
-                .ok_or_else(|| format_err!("Token without a label: {}", token.form()))?;
-            encoding.push(label.to_owned());
-        }
-
-        Ok(encoding)
-    }
-}
-
 /// An encoder wrapper that encodes/decodes to a categorical label.
 pub struct CategoricalEncoder<E, V>
 where


@@ 213,8 162,9 @@
 
     use conllx::io::Reader;
 
-    use super::{CategoricalEncoder, LayerEncoder};
-    use crate::{EncodingProb, Layer, Numberer, SentenceDecoder, SentenceEncoder};
+    use super::layer::LayerEncoder;
+    use super::{CategoricalEncoder, EncodingProb, SentenceDecoder, SentenceEncoder};
+    use crate::{Layer, Numberer};
 
     static NON_PROJECTIVE_DATA: &'static str = "testdata/nonprojective.conll";
 

M sticker/src/lib.rs => sticker/src/lib.rs +1 -6
@@ 1,12 1,7 @@
 mod collector;
 pub use crate::collector::{Collector, NoopCollector};
 
-mod encoder;
-pub use crate::encoder::{
-    CategoricalEncoder, EncodingProb, LayerEncoder, SentenceDecoder, SentenceEncoder,
-};
-
-pub mod depparse;
+pub mod encoder;
 
 mod input;
 pub use crate::input::{Embeddings, LayerEmbeddings, SentVectorizer};

M sticker/src/tensorflow/dataset.rs => sticker/src/tensorflow/dataset.rs +2 -1
@@ 6,7 6,8 @@
 use failure::Fallible;
 
 use super::tensor::{LabelTensor, TensorBuilder};
-use crate::{CategoricalEncoder, SentVectorizer, SentenceEncoder};
+use crate::encoder::{CategoricalEncoder, SentenceEncoder};
+use crate::SentVectorizer;
 
 /// A set of training/validation data.
 ///

M sticker/src/tensorflow/tagger.rs => sticker/src/tensorflow/tagger.rs +2 -2
@@ 19,8 19,8 @@
 
 use super::tensor::{NoLabels, TensorBuilder};
 use super::util::{prepare_path, status_to_error};
-use crate::encoder::{CategoricalEncoder, SentenceDecoder};
-use crate::{EncodingProb, SentVectorizer, Tag};
+use crate::encoder::{CategoricalEncoder, EncodingProb, SentenceDecoder};
+use crate::{SentVectorizer, Tag};
 
 #[derive(Clone, Debug, Deserialize, Eq, PartialEq, Serialize)]
 #[serde(deny_unknown_fields)]