RogerBW's Blog: Perl Weekly Challenge 97: Caesar Substrings

I’ve been doing the Perl Weekly Challenges. The latest involved implementing the Caesar cipher and breaking up binary strings. (Note that this is open until 31 January 2021.)

TASK #1 › Caesar Cipher

You are given string $S containing alphabets A..Z only and a number $N.

Write a script to encrypt the given string $S using Caesar Cipher with left shift of size $N.

Left shift? How odd. Ah, it's the example on Wikipedia. Well, fair enough.

I see two basic approaches here: one is to map the string down to a series of char values, transform them, then bring them back up to make a string. One could do that. Instead, I built a hash to map from plaintext character to encrypted character, falling back to the character itself if there's no hash entry. I hardcoded the number of alphabet entries, but shifted this a bit later on in other languages.

sub cc {
  my $s=shift;
  my $n=shift;
  my @plain=('A'..'Z');
  my @cipher=(@plain,@plain);
  splice @cipher,0,(26*2-$n)%26;
  my %m=map {$plain[$_] => $cipher[$_]} (0..$#plain);
  return join('',map {$m{$_} || $_} split '',$s);
}

Raku is basically the same.

sub cc($s,$n) {
  my @plain=('A'..'Z');
  my @cipher=(@plain,@plain).flat;
  splice @cipher,0,(26*2-$n)%26;
  my %m=map {@plain[$_] => @cipher[$_]},(0..@plain.end);
  return (map {%m{$_} || $_},$s.comb()).join('');
}

In Python I measured the size of the character set (and discovered that I couldn't get a range of chars), and had to write out the hash tests in full.

def cc(s,n):
    plain=list([chr(i) for i in range(ord('A'),ord('Z')+1)])
    cmap=dict()
    asize=len(plain)
    for p in range(asize):
        c=(p+asize*2-n) % asize
        cmap[plain[p]]=plain[c]
    out=""
    for i in range(len(s)):
        if s[i] in cmap:
            out += cmap[s[i]]
        else:
            out += s[i]
    return out

Ruby was pretty much a reimplementation of Python.

def cc(s,n)
  plain=('A'..'Z').to_a
  cmap=Hash.new
  asize=plain.length
  0.upto(asize-1) do |p|
    c=(p+asize*2-n) % asize
    cmap[plain[p]]=plain[c]
  end
  out=""
  s.split('').each do |ss|
    if cmap.has_key?(ss) then
      out += cmap[ss]
    else
      out += ss
    end
  end
  return out
end

As was Rust, with a bit of fiddling back and forth between usize and other types. (usize, after all, can't go negative…)

fn cc(s: &str, n: i8) -> String {
    let plain=(b'A'..=b'Z').map(|b| b as char).collect::<Vec<char>>();
    let mut cmap: HashMap<char,char>=HashMap::new();
    let asize=plain.len();
    for p in 0..asize {
        let c=(((p as i8)+(asize as i8)*2-n) % (asize as i8)) as usize;
        cmap.insert(plain[p],plain[c]);
    }
    let mut out: String="".to_string();
    for ss in s.chars() {
        if cmap.contains_key(&ss) {
            out.push(*cmap.get(&ss).unwrap());
        } else {
            out.push(ss);
        }
    }
    return out.to_string();
}

TASK #2 › Binary Substrings

You are given a binary string $B and an integer $S.

Write a script to split the binary string $B of size $S and then find the minimum number of flips required to make it all the same.

("Binary string" means here "string consisting of a series of 1 and 0 characters".)

Splitting a string to something other than characters or words. There's unusual.

The fiddly bit here is that they don't all have to become the same as the first one; any one string might be the one into which they all get flipped (consider for example the series 000, 111, 111, 111 which should return 3 not 9). So I ended up calculating an edit distance between each pair of items (much more the usual sort of thing than last week's Levenshtein), and cacheing them because the distance from A to B will be the same as from B to A. The cheapest row of edits is the one to use.

sub bs {
  my $b=shift;
  my $s=shift;
  my @bb=grep /./,split /(.{$s})/,$b;
  if (length($bb[-1]) != $s) {
    return -1;
  }
  my $mc;
  my %cost;
  foreach my $x (0..$#bb-1) {
    $cost{$x}{$x}=0;
    foreach my $y ($x+1..$#bb) {
      $cost{$x}{$y}=$cost{$y}{$x}=diff($bb[$x],$bb[$y]);
    }
    my $tc=sum(map {$cost{$x}{$_}} 0..$#bb);
    if (!defined $mc || $tc < $mc) {
      $mc=$tc;
    }
  }
  return $mc;
}

That diff function would be a good candidate for cacheing too. Or optimisation if we were using real binary - XOR the two arguments, then bit-count the result.

sub diff {
  my ($a,$b)=@_;
  my @ac=split '',$a;
  my @bc=split '',$b;
  my $d=0;
  foreach my $i (0..$#ac) {
    if ($ac[$i] ne $bc[$i]) {
      $d++;
    }
  }
  return $d;
}

Raku is basically the same here, with some tweaks for syntax. (And comb(n) splits a string into n-sizes units.)

sub bs($b,$s) {
  my @bb=$b.comb($s);
  if (chars(@bb[@bb.end]) != $s) {
    return -1;
  }
  my $mc=-1;
  my %cost;
  for (0..@bb.end-1) -> $x {
    %cost{$x}{$x}=0;
    for ($x+1..@bb.end) -> $y {
      %cost{$x}{$y}=%cost{$y}{$x}=diff(@bb[$x],@bb[$y]);
    }
    my $tc=sum(map {%cost{$x}{$_}},0..@bb.end);
    if ($mc==-1 || $tc < $mc) {
      $mc=$tc;
    }
  }
  return $mc;
}

sub diff($a,$b) {
  my @ac=$a.comb;
  my @bc=$b.comb;
  my $d=0;
  for (0..@ac.end) -> $i {
    if (@ac[$i] ne @bc[$i]) {
      $d++;
    }
  }
  return $d;
}

For Python I worked out the string slicing in full rather than using a map, and used defaultdict to get an autovivified hash of hashes; string slicing came free.

def diff(a,b):
    d=0
    for i in range(len(a)):
        if a[i] != b[i]:
            d += 1
    return d

def bs(b,s):
    bb=list()
    i=0
    while i<len(b):
        bb.append(b[i:i+s])
        i+=s
    if len(bb[len(bb)-1]) != s:
        return -1
    mc=-1
    cost=defaultdict(dict)
    for x in range(len(bb)-1):
        cost[x][x]=0
        for y in range(x+1,len(bb)):
            t=diff(bb[x],bb[y])
            cost[x][y]=t
            cost[y][x]=t
        tc=sum(cost[x][i] for i in range(len(bb)))
        if mc==-1 or tc < mc:
            mc=tc
    return mc

Ruby was more or less the same as python, except I put in traps to initialise the inner hashes just before they were used.

def diff(a,b)
  aa=a.split('')
  bb=b.split('')
  d=0
  0.upto(aa.length()-1) do |i|
    if a[i] != b[i]
      d+=1
    end
  end
  return d
end

def bs(b,s)
  bb=Array.new
  i=0
  while (i < b.length) do
    bb.push(b.slice(i,s))
    i+=s
  end
  if bb[-1].length != s then
    return -1
  end
  mc=-1
  cost=Hash.new
  0.upto(bb.length-2) do |x|
    if !cost.has_key?(x) then
      cost[x]=Hash.new
    end
    cost[x][x]=0
    x+1.upto(bb.length-1) do |y|
      cost[x][y]=diff(bb[x],bb[y])
      if !cost.has_key?(y) then
        cost[y]=Hash.new
      end
      cost[y][x]=cost[x][y]
    end
    tc=0.upto(bb.length-1).map{|i| cost[x][i]}.sum()
    if mc==-1 or tc < mc then
      mc=tc
    end
  end
  return mc
end

And in Rust I ended up not using hashes at all, but vectors. (I could of course have used arrays everywhere else, but Perl's autovivification was the easy way to approach it, and the code held onto that as I morphed it into other languages.) (And note how similar the calculation of tc is to the lovely Ruby version, as opposed to the comparative ugliness of Python, Raku and Perl. I do like a simple left-to-right control flow.)

fn diff(a: &str,b: &str) -> i64 {
    let mut d: i64=0;
    for (s,t) in a.chars().zip(b.chars()) {
        if s != t {
            d+=1;
        }
    }
    return d;
}

fn bs(b: &str, s: i64) -> i64 {
    let mut bb: Vec<String>=vec![];
    let mut i=0;
    while i<b.len() {
        bb.push(b[i..i+(s as usize)].to_string());
        i+=s as usize;
    }
    if bb[bb.len()-1].len() != s as usize {
        return -1;
    }
    let mut mc: i64 = -1;
    let mut cost=vec![vec![0; bb.len()]; bb.len()];
    for x in 0..bb.len()-1 {
        cost[x][x]=0;
        for y in x+1..bb.len() {
            let t=diff(&bb[x],&bb[y]);
            cost[x][y]=t;
            cost[y][x]=t;
        }
        let tc=(0..bb.len()).map(|i| cost[x][i]).sum();
        if mc==-1 || tc < mc {
            mc=tc;
        }
    }
    return mc;
}

Full code on github.

Comments on this post are now closed. If you have particular grounds for adding a late comment, comment on a more recent post quoting the URL of this one.