XiaoYun Zhang commited on
Commit
750c19e
1 Parent(s): dccd8ef

use tokenizer from mlnet

Browse files
Files changed (4) hide show
  1. Program.cs +12 -19
  2. clip.csproj +9 -2
  3. merges.txt +0 -0
  4. vocab.json +0 -0
Program.cs CHANGED
@@ -1,35 +1,28 @@
1
- using System;
 
 
2
  using System.Collections.Generic;
3
  using System.IO;
4
  using System.Linq;
5
  using TorchSharp;
6
 
7
- torchvision.io.DefaultImager = new torchvision.io.SkiaImager();
8
- var device = TorchSharp.torch.device("cuda:0");
9
- var clipEncoder = new ClipEncoder("clip_encoder.ckpt", device);
10
  var start_token = 49406;
11
  var end_token = 49407;
12
- var dictionary = new Dictionary<string, long>(){
13
- {"cat", 2368},
14
- {"a", 320},
15
- {"cute", 2242},
16
- {"blue", 1746},
17
- {"wild", 3220},
18
- {"green", 1901},
19
- };
20
-
21
- var batch = 1;
22
-
23
  var prompt = "a wild cute green cat";
24
- var tokens = prompt.Split(' ').Select(x => dictionary[x]).ToList();
25
- tokens = tokens.Prepend(start_token).ToList();
26
- tokens = tokens.Append(end_token).ToList();
27
- tokens = tokens.Concat(Enumerable.Repeat<long>(0, 77 - tokens.Count)).ToList();
28
  var uncontional_tokens = new[]{start_token, end_token}.Concat(Enumerable.Repeat(0, 75)).ToList();
29
  var tokenTensor = torch.tensor(tokens.ToArray(), dtype: torch.ScalarType.Int64, device: device);
30
  tokenTensor = tokenTensor.repeat(batch, 1);
31
  var unconditional_tokenTensor = torch.tensor(uncontional_tokens.ToArray(), dtype: torch.ScalarType.Int64, device: device);
32
  unconditional_tokenTensor = unconditional_tokenTensor.repeat(batch, 1);
 
 
 
 
33
  var img = torch.randn(batch, 4, 64, 64, dtype: torch.ScalarType.Float32, device: device);
34
  var t = torch.full(new[]{batch, 1L}, value: batch, dtype: torch.ScalarType.Int32, device: device);
35
  var condition = clipEncoder.Forward(tokenTensor);
 
1
+ using Microsoft.ML;
2
+ using Microsoft.ML.Tokenizers;
3
+ using System;
4
  using System.Collections.Generic;
5
  using System.IO;
6
  using System.Linq;
7
  using TorchSharp;
8
 
9
+ var batch = 1;
10
+ var bpe = new Bpe("vocab.json", "merges.txt", endOfWordSuffix: "</w>");
11
+ var tokenier = new Tokenizer(bpe);
12
  var start_token = 49406;
13
  var end_token = 49407;
 
 
 
 
 
 
 
 
 
 
 
14
  var prompt = "a wild cute green cat";
15
+ var res = tokenier.Encode(prompt);
16
+ var tokens = new[] { start_token }.Concat(res.Ids.Concat(Enumerable.Repeat(0, 75 - res.Ids.Count))).Concat(new[] { end_token }).ToList();
 
 
17
  var uncontional_tokens = new[]{start_token, end_token}.Concat(Enumerable.Repeat(0, 75)).ToList();
18
  var tokenTensor = torch.tensor(tokens.ToArray(), dtype: torch.ScalarType.Int64, device: device);
19
  tokenTensor = tokenTensor.repeat(batch, 1);
20
  var unconditional_tokenTensor = torch.tensor(uncontional_tokens.ToArray(), dtype: torch.ScalarType.Int64, device: device);
21
  unconditional_tokenTensor = unconditional_tokenTensor.repeat(batch, 1);
22
+
23
+ torchvision.io.DefaultImager = new torchvision.io.SkiaImager();
24
+ var device = TorchSharp.torch.device("cuda:0");
25
+ var clipEncoder = new ClipEncoder("clip_encoder.ckpt", device);
26
  var img = torch.randn(batch, 4, 64, 64, dtype: torch.ScalarType.Float32, device: device);
27
  var t = torch.full(new[]{batch, 1L}, value: batch, dtype: torch.ScalarType.Int32, device: device);
28
  var condition = clipEncoder.Forward(tokenTensor);
clip.csproj CHANGED
@@ -9,12 +9,19 @@
9
  </PropertyGroup>
10
 
11
  <ItemGroup>
 
 
12
  <PackageReference Include="TorchVision" Version="$(TorchVersion)" />
13
  <PackageReference Include="TorchSharp-cuda-linux" Version="$(TorchVersion)" />
14
- <None Update="*.ckpt">
 
 
 
 
 
 
15
  <CopyToOutputDirectory>Always</CopyToOutputDirectory>
16
  </None>
17
- <PackageReference Include="Microsoft.ML" Version="2.0.1" />
18
  </ItemGroup>
19
 
20
  </Project>
 
9
  </PropertyGroup>
10
 
11
  <ItemGroup>
12
+ <PackageReference Include="Microsoft.ML" Version="2.0.1" />
13
+ <PackageReference Include="Microsoft.ML.Tokenizers" Version="0.20.1" />
14
  <PackageReference Include="TorchVision" Version="$(TorchVersion)" />
15
  <PackageReference Include="TorchSharp-cuda-linux" Version="$(TorchVersion)" />
16
+ <None Update="*.ckpt">
17
+ <CopyToOutputDirectory>Always</CopyToOutputDirectory>
18
+ </None>
19
+ <None Update="merges.txt">
20
+ <CopyToOutputDirectory>Always</CopyToOutputDirectory>
21
+ </None>
22
+ <None Update="vocab.json">
23
  <CopyToOutputDirectory>Always</CopyToOutputDirectory>
24
  </None>
 
25
  </ItemGroup>
26
 
27
  </Project>
merges.txt ADDED
The diff for this file is too large to render. See raw diff
 
vocab.json ADDED
The diff for this file is too large to render. See raw diff