diff --git a/README.md b/README.md index 4511580..8e9bd93 100644 --- a/README.md +++ b/README.md @@ -146,6 +146,94 @@ All messages have a role which can either be "assistant" or "prompter". The role conversation threads from prompt to leaf node in a conversation tree are stricly alternating between "assistant" and "prompter". +### JSON Example: Message + +For readability the following JSON examples are shown formatted with indentation on multiple lines. +Objects are stored without indentation on a single lines in the actual jsonl files. + +``` +{ + "message_id": "218440fd-5317-4355-91dc-d001416df62b", + "parent_id": "13592dfb-a6f9-4748-a92c-32b34e239bb4", + "user_id": "8e95461f-5e94-4d8b-a2fb-d4717ce973e4", + "text": "It was the winter of 2035, and artificial intelligence (..)", + "role": "assistant", + "lang": "en", + "review_count": 3, + "review_result": true, + "deleted": false, + "rank": 0, + "synthetic": true, + "model_name": "oasst-sft-0_3000,max_new_tokens=400 (..)", + "labels": { + "spam": { "value": 0.0, "count": 3 }, + "lang_mismatch": { "value": 0.0, "count": 3 }, + "pii": { "value": 0.0, "count": 3 }, + "not_appropriate": { "value": 0.0, "count": 3 }, + "hate_speech": { "value": 0.0, "count": 3 }, + "sexual_content": { "value": 0.0, "count": 3 }, + "quality": { "value": 0.416, "count": 3 }, + "toxicity": { "value": 0.16, "count": 3 }, + "humor": { "value": 0.0, "count": 3 }, + "creativity": { "value": 0.33, "count": 3 }, + "violence": { "value": 0.16, "count": 3 } + } +} +``` + +### JSON Example: Conversation Tree + +For readability only a subset of properties are shown here. + +``` +{ + "message_tree_id": "14fbb664-a620-45ce-bee4-7c519b16a793", + "tree_state": "ready_for_export", + "prompt": { + "message_id": "14fbb664-a620-45ce-bee4-7c519b16a793", + "text": "Why can't we divide by 0? (..)", + "role": "prompter", + "lang": "en", + "replies": [ + { + "message_id": "894d30b6-56b4-4605-a504-89dd15d4d1c8", + "text": "The reason we cannot divide by zero is because (..)", + "role": "assistant", + "lang": "en", + "replies": [ + // ... + ] + }, + { + "message_id": "84d0913b-0fd9-4508-8ef5-205626a7039d", + "text": "The reason that the result of a division by zero is (..)", + "role": "assistant", + "lang": "en", + "replies": [ + { + "message_id": "3352725e-f424-4e3b-a627-b6db831bdbaa", + "text": "Math is confusing. Like those weird Irrational (..)", + "role": "prompter", + "lang": "en", + "replies": [ + { + "message_id": "f46207ca-3149-46e9-a466-9163d4ce499c", + "text": "Irrational numbers are simply numbers (..)", + "role": "assistant", + "lang": "en", + "replies": [] + }, + // ... + ] + } + ] + } + ] + } +} +``` + + Please refer to [oasst-data](https://github.com/LAION-AI/Open-Assistant/tree/main/oasst-data) for details about the data structure and python code to read and write jsonl files containing oasst objects.