ec_multisymbol: Split off new new_tokenset experiment

The new_tokenset experiment replaces the unconstrained tokenset with a
multisymbol alphabet in an inventive way.

Tested configurations:
new_tokenset + ec_adapt, new_tokenset, ec_multisymbol

Change-Id: I846ab2e51c2a1dc3f2f9904ed8c47a8e98f853c5
diff --git a/tools/gen_constrained_tokenset.py b/tools/gen_constrained_tokenset.py
index d3cbf8d..5d12ee1 100755
--- a/tools/gen_constrained_tokenset.py
+++ b/tools/gen_constrained_tokenset.py
@@ -93,25 +93,28 @@
   return q
 
 
-def get_quantized_spareto(p, beta, bits):
+def get_quantized_spareto(p, beta, bits, first_token):
   parray = get_spareto(p, beta)
   parray = parray[1:] / (1 - parray[0])
-#if CONFIG_EC_MULTISYMBOL, truncate the array again
-  tarray = parray[1:] / (1 - parray[0])
-  qarray = quantize_probs(tarray, False, bits)
+  # CONFIG_NEW_TOKENSET
+  if first_token > 1:
+    parray = parray[1:] / (1 - parray[0])
+  qarray = quantize_probs(parray, first_token == 1, bits)
   return qarray.astype(np.int)
 
 
-def main(bits=15):
+def main(bits=15, first_token=1):
   beta = 8
   for q in range(1, 256):
-    parray = get_quantized_spareto(q / 256., beta, bits)
+    parray = get_quantized_spareto(q / 256., beta, bits, first_token)
     assert parray.sum() == 2**bits
     print '{', ', '.join('%d' % i for i in parray), '},'
 
 
 if __name__ == '__main__':
-  if len(sys.argv) > 1:
+  if len(sys.argv) > 2:
+    main(int(sys.argv[1]), int(sys.argv[2]))
+  elif len(sys.argv) > 1:
     main(int(sys.argv[1]))
   else:
     main()