|
@ -0,0 +1,56 @@
|
||||||
|
import numpy as np
|
||||||
|
|
||||||
|
|
||||||
|
class Comment:
|
||||||
|
by_id = {}
|
||||||
|
|
||||||
|
def __init__(self, line):
|
||||||
|
parts = line.split("\t")
|
||||||
|
self.comment_id = int(parts[0])
|
||||||
|
self.parent_id = int(parts[2]) if parts[2] != "\\N" else None
|
||||||
|
self.author_id = int(parts[1])
|
||||||
|
self.body = parts[3].replace("\\r\\n", "\n").replace("\\n", "\n").strip()
|
||||||
|
|
||||||
|
self.parent = None
|
||||||
|
self.children = []
|
||||||
|
|
||||||
|
self.by_id[self.comment_id] = self
|
||||||
|
|
||||||
|
|
||||||
|
with open("drama.csv") as f:
|
||||||
|
comments = [Comment(l) for l in f.readlines()]
|
||||||
|
|
||||||
|
for c in comments:
|
||||||
|
if c.parent_id:
|
||||||
|
if c.parent_id in Comment.by_id:
|
||||||
|
c.parent = Comment.by_id[c.parent_id]
|
||||||
|
c.parent.children.append(c)
|
||||||
|
else:
|
||||||
|
c.parent_id = -1
|
||||||
|
|
||||||
|
|
||||||
|
def render_threads(c):
|
||||||
|
threads = []
|
||||||
|
# Use a more unique separator in the future that doesn't conflict with Markdown.
|
||||||
|
text = f"--- {c.author_id} ---\n{c.body}\n\n"
|
||||||
|
|
||||||
|
if not c.children:
|
||||||
|
return [text]
|
||||||
|
|
||||||
|
for child in c.children:
|
||||||
|
child_threads = render_threads(child)
|
||||||
|
for t in child_threads:
|
||||||
|
threads.append(text + t)
|
||||||
|
|
||||||
|
return threads
|
||||||
|
|
||||||
|
|
||||||
|
threads = []
|
||||||
|
for c in comments:
|
||||||
|
if c.parent_id is None and c.author_id != 261 and c.children:
|
||||||
|
threads += [t.strip() for t in render_threads(c)]
|
||||||
|
|
||||||
|
np.random.shuffle(threads)
|
||||||
|
|
||||||
|
with open("train.txt", "w") as f:
|
||||||
|
f.write("<|endoftext|>".join(threads))
|
|
@ -0,0 +1,3 @@
|
||||||
|
* [Fine-tuning guide](https://github.com/kingoflolz/mesh-transformer-jax/blob/master/howto_finetune.md)
|
||||||
|
* [Updated config instructions for the TPU machine](https://github.com/kingoflolz/mesh-transformer-jax/issues/202#issuecomment-1050887576)
|
||||||
|
* Fits on a free tier Oracle Cloud machine with 24GB RAM and 16GB of swap.
|
|
@ -0,0 +1,3 @@
|
||||||
|
* [Fine-tuning guide](https://github.com/kingoflolz/mesh-transformer-jax/blob/master/howto_finetune.md)
|
||||||
|
* [Updated config instructions for the TPU machine](https://github.com/kingoflolz/mesh-transformer-jax/issues/202#issuecomment-1050887576)
|
||||||
|
* Fits on a free tier Oracle Cloud machine with 24GB RAM and 16GB of swap.
|
After Width: | Height: | Size: 52 KiB |
After Width: | Height: | Size: 75 KiB |
After Width: | Height: | Size: 62 KiB |
After Width: | Height: | Size: 37 KiB |
After Width: | Height: | Size: 50 KiB |
After Width: | Height: | Size: 66 KiB |
After Width: | Height: | Size: 67 KiB |
After Width: | Height: | Size: 62 KiB |
After Width: | Height: | Size: 50 KiB |
After Width: | Height: | Size: 42 KiB |
After Width: | Height: | Size: 42 KiB |
After Width: | Height: | Size: 62 KiB |
After Width: | Height: | Size: 40 KiB |
After Width: | Height: | Size: 45 KiB |
After Width: | Height: | Size: 80 KiB |
After Width: | Height: | Size: 51 KiB |
After Width: | Height: | Size: 41 KiB |
After Width: | Height: | Size: 69 KiB |
After Width: | Height: | Size: 53 KiB |
After Width: | Height: | Size: 65 KiB |
After Width: | Height: | Size: 56 KiB |
After Width: | Height: | Size: 42 KiB |
After Width: | Height: | Size: 43 KiB |
After Width: | Height: | Size: 42 KiB |
After Width: | Height: | Size: 51 KiB |
After Width: | Height: | Size: 67 KiB |
After Width: | Height: | Size: 64 KiB |
After Width: | Height: | Size: 82 KiB |
After Width: | Height: | Size: 45 KiB |
After Width: | Height: | Size: 74 KiB |
After Width: | Height: | Size: 59 KiB |
After Width: | Height: | Size: 85 KiB |
After Width: | Height: | Size: 53 KiB |
After Width: | Height: | Size: 65 KiB |
After Width: | Height: | Size: 63 KiB |
After Width: | Height: | Size: 57 KiB |
After Width: | Height: | Size: 58 KiB |
After Width: | Height: | Size: 37 KiB |
After Width: | Height: | Size: 61 KiB |
After Width: | Height: | Size: 45 KiB |
After Width: | Height: | Size: 62 KiB |
After Width: | Height: | Size: 53 KiB |
After Width: | Height: | Size: 46 KiB |
After Width: | Height: | Size: 49 KiB |
After Width: | Height: | Size: 72 KiB |
After Width: | Height: | Size: 60 KiB |
After Width: | Height: | Size: 69 KiB |
After Width: | Height: | Size: 68 KiB |
After Width: | Height: | Size: 60 KiB |
After Width: | Height: | Size: 56 KiB |
After Width: | Height: | Size: 56 KiB |
After Width: | Height: | Size: 62 KiB |
After Width: | Height: | Size: 59 KiB |
After Width: | Height: | Size: 53 KiB |
After Width: | Height: | Size: 53 KiB |
After Width: | Height: | Size: 54 KiB |
After Width: | Height: | Size: 47 KiB |
After Width: | Height: | Size: 54 KiB |
After Width: | Height: | Size: 63 KiB |
After Width: | Height: | Size: 34 KiB |
After Width: | Height: | Size: 37 KiB |
After Width: | Height: | Size: 43 KiB |
After Width: | Height: | Size: 44 KiB |
After Width: | Height: | Size: 110 KiB |
After Width: | Height: | Size: 115 KiB |
After Width: | Height: | Size: 73 KiB |
After Width: | Height: | Size: 50 KiB |
After Width: | Height: | Size: 45 KiB |
After Width: | Height: | Size: 47 KiB |
After Width: | Height: | Size: 67 KiB |
After Width: | Height: | Size: 180 KiB |
After Width: | Height: | Size: 62 KiB |
After Width: | Height: | Size: 67 KiB |
After Width: | Height: | Size: 56 KiB |
After Width: | Height: | Size: 68 KiB |
After Width: | Height: | Size: 64 KiB |
After Width: | Height: | Size: 57 KiB |
After Width: | Height: | Size: 42 KiB |
After Width: | Height: | Size: 63 KiB |
After Width: | Height: | Size: 40 KiB |
After Width: | Height: | Size: 42 KiB |
After Width: | Height: | Size: 55 KiB |
After Width: | Height: | Size: 65 KiB |
After Width: | Height: | Size: 47 KiB |
After Width: | Height: | Size: 62 KiB |
After Width: | Height: | Size: 52 KiB |
After Width: | Height: | Size: 67 KiB |
After Width: | Height: | Size: 59 KiB |
After Width: | Height: | Size: 65 KiB |
After Width: | Height: | Size: 30 KiB |
After Width: | Height: | Size: 54 KiB |
After Width: | Height: | Size: 48 KiB |
After Width: | Height: | Size: 116 KiB |
After Width: | Height: | Size: 43 KiB |
After Width: | Height: | Size: 52 KiB |
After Width: | Height: | Size: 47 KiB |
After Width: | Height: | Size: 55 KiB |