- API docs
- CLI
- Integration guides
- Blog
- How machines learn to understand words: a guide to embeddings in NLP
- Prompt-based learning with Transformers
- Efficient Transformers II: knowledge distillation & fine-tuning
- Efficient Transformers I: attention mechanisms
- Deep hierarchical unsupervised intent modelling: getting value without training data
- Fixing annotating bias with Communications Mining
- Active learning: better ML models in less time
- It's all in the numbers - assessing model performance with metrics
- Why model validation is important
- Comparing Communications Mining and Google AutoML for conversational data intelligence
Streams (legacy)
/api/v1/datasets/<project>/<dataset_name>/streams
/api/v1/datasets/<project>/<dataset_name>/streams
Permissions required: Streams admin, View labels
- Bash
curl -X PUT 'https://<my_api_endpoint>/api/v1/datasets/project1/collateral/streams' \ -H "Authorization: Bearer $REINFER_TOKEN" \ -H "Content-Type: application/json" \ -d '{ "stream": { "comment_filter": { "user_properties": { "number:Spend": { "maximum": 100000, "minimum": 100 }, "number:Transactions": { "one_of": [ 1 ] }, "string:Country": { "one_of": [ "uk", "de" ] } } }, "description": "Used by ACME RPA to create tickets for disputes.", "model": { "label_thresholds": [ { "name": [ "Some Label" ], "threshold": 0.37 }, { "name": [ "Another Label" ], "threshold": 0.46 }, { "name": [ "Parent Label", "Child Label" ], "threshold": 0.41 } ], "version": 8 }, "name": "dispute", "title": "Collateral Disputes" } }'
curl -X PUT 'https://<my_api_endpoint>/api/v1/datasets/project1/collateral/streams' \ -H "Authorization: Bearer $REINFER_TOKEN" \ -H "Content-Type: application/json" \ -d '{ "stream": { "comment_filter": { "user_properties": { "number:Spend": { "maximum": 100000, "minimum": 100 }, "number:Transactions": { "one_of": [ 1 ] }, "string:Country": { "one_of": [ "uk", "de" ] } } }, "description": "Used by ACME RPA to create tickets for disputes.", "model": { "label_thresholds": [ { "name": [ "Some Label" ], "threshold": 0.37 }, { "name": [ "Another Label" ], "threshold": 0.46 }, { "name": [ "Parent Label", "Child Label" ], "threshold": 0.41 } ], "version": 8 }, "name": "dispute", "title": "Collateral Disputes" } }' - Node
const request = require("request"); request.put( { url: "https://<my_api_endpoint>/api/v1/datasets/project1/collateral/streams", headers: { Authorization: "Bearer " + process.env.REINFER_TOKEN, }, json: true, body: { stream: { comment_filter: { user_properties: { "number:Spend": { maximum: 100000, minimum: 100 }, "number:Transactions": { one_of: [1] }, "string:Country": { one_of: ["uk", "de"] }, }, }, description: "Used by ACME RPA to create tickets for disputes.", model: { label_thresholds: [ { name: ["Some Label"], threshold: 0.37 }, { name: ["Another Label"], threshold: 0.46 }, { name: ["Parent Label", "Child Label"], threshold: 0.41 }, ], version: 8, }, name: "dispute", title: "Collateral Disputes", }, }, }, function (error, response, json) { // digest response console.log(JSON.stringify(json, null, 2)); } );
const request = require("request"); request.put( { url: "https://<my_api_endpoint>/api/v1/datasets/project1/collateral/streams", headers: { Authorization: "Bearer " + process.env.REINFER_TOKEN, }, json: true, body: { stream: { comment_filter: { user_properties: { "number:Spend": { maximum: 100000, minimum: 100 }, "number:Transactions": { one_of: [1] }, "string:Country": { one_of: ["uk", "de"] }, }, }, description: "Used by ACME RPA to create tickets for disputes.", model: { label_thresholds: [ { name: ["Some Label"], threshold: 0.37 }, { name: ["Another Label"], threshold: 0.46 }, { name: ["Parent Label", "Child Label"], threshold: 0.41 }, ], version: 8, }, name: "dispute", title: "Collateral Disputes", }, }, }, function (error, response, json) { // digest response console.log(JSON.stringify(json, null, 2)); } ); - Python
import json import os import requests response = requests.put( "https://<my_api_endpoint>/api/v1/datasets/project1/collateral/streams", headers={"Authorization": "Bearer " + os.environ["REINFER_TOKEN"]}, json={ "stream": { "name": "dispute", "title": "Collateral Disputes", "description": "Used by ACME RPA to create tickets for disputes.", "model": { "version": 8, "label_thresholds": [ {"name": ["Some Label"], "threshold": 0.37}, {"name": ["Another Label"], "threshold": 0.46}, { "name": ["Parent Label", "Child Label"], "threshold": 0.41, }, ], }, "comment_filter": { "user_properties": { "string:Country": {"one_of": ["uk", "de"]}, "number:Spend": {"minimum": 100, "maximum": 100000}, "number:Transactions": {"one_of": [1]}, } }, } }, ) print(json.dumps(response.json(), indent=2, sort_keys=True))
import json import os import requests response = requests.put( "https://<my_api_endpoint>/api/v1/datasets/project1/collateral/streams", headers={"Authorization": "Bearer " + os.environ["REINFER_TOKEN"]}, json={ "stream": { "name": "dispute", "title": "Collateral Disputes", "description": "Used by ACME RPA to create tickets for disputes.", "model": { "version": 8, "label_thresholds": [ {"name": ["Some Label"], "threshold": 0.37}, {"name": ["Another Label"], "threshold": 0.46}, { "name": ["Parent Label", "Child Label"], "threshold": 0.41, }, ], }, "comment_filter": { "user_properties": { "string:Country": {"one_of": ["uk", "de"]}, "number:Spend": {"minimum": 100, "maximum": 100000}, "number:Transactions": {"one_of": [1]}, } }, } }, ) print(json.dumps(response.json(), indent=2, sort_keys=True)) - Response
{ "status": "ok", "stream": { "context": "0", "created_at": "2019-08-03T12:30:00.123456Z", "dataset_id": "abcdef0123456789", "description": "Used by ACME RPA to create tickets for disputes.", "id": "0123456789abcdef", "model": { "version": 8 }, "name": "dispute", "title": "Collateral Disputes", "updated_at": "2019-08-03T12:30:00.123456Z" } }
{ "status": "ok", "stream": { "context": "0", "created_at": "2019-08-03T12:30:00.123456Z", "dataset_id": "abcdef0123456789", "description": "Used by ACME RPA to create tickets for disputes.", "id": "0123456789abcdef", "model": { "version": 8 }, "name": "dispute", "title": "Collateral Disputes", "updated_at": "2019-08-03T12:30:00.123456Z" } }
Streams enable persistent, stateful iteration through comments in a dataset, with predicted labels and general fields computed using a pinned model.
NAME | TYPE | REQUIRED | DESCRIPTION |
---|---|---|---|
name | string | yes | API name for the stream, used in URLs. Must be unique within a
dataset and must match
[A-Za-z0-9-_]{1,256} .
|
title | string | no | One-line human-readable title for the stream. |
description | string | no | A longer description of the stream. |
model | Model | no | If specified, comments fetched from this stream will contain predictions from a pinned model. |
comment_filter | CommentFilter | no | If specified, comments not matching the filter will not be returned. See here for details on how the comment filter will affect the results returned by the stream. |
Model
has the following format:
NAME | TYPE | REQUIRED | DESCRIPTION |
---|---|---|---|
version | integer | yes | A model version that has been pinned via the Models page. |
label_thresholds | array<LabelThreshold> | no | If set, only values matching the given
label_thresholds are returned. If not set, all
labels and all prediction values will be returned.
|
LabelThreshold
has the following format:
NAME | TYPE | REQUIRED | DESCRIPTION |
---|---|---|---|
name | array<string> | yes | The name of the label to be returned, formatted as a list of
hierarchical labels. For instance, the label "Some
Label" will have the format ["Some
Label"] , and the label "Parent Label > Child
Label" will have the format ["Parent Label",
"Child Label"] .
|
threshold | number | yes | The confidence threshold to use for the label (a number between 0.0 and 1.0). The label will only be returned for a comment if its prediction is above this threshold. |
CommentFilter
has the following format:
NAME | TYPE | REQUIRED | DESCRIPTION |
---|---|---|---|
user_properties | UserPropertyFilter | no | A filter that applies to the user properties of a comment. For more on user properties, see the Comment Reference. |
UserPropertyFilter
is a map of user property name to filter.
String properties may be filtered to values in a set ({"one_of": ["val_1",
"val_2"]}
). Number properties may be filtered either to values in a set
({"one_of": [123, 456]}
) or to a range ({"minimum":
123, "maximum": 456}
).
/api/v1/datasets/<project>/<dataset_name>/streams
/api/v1/datasets/<project>/<dataset_name>/streams
- Bash
curl -X PUT 'https://<my_api_endpoint>/api/v1/datasets/project1/collateral/streams' \ -H "Authorization: Bearer $REINFER_TOKEN" \ -H "Content-Type: application/json" \ -d '{ "stream": { "comment_filter": { "user_properties": { "number:Spend": { "maximum": 100000, "minimum": 100 }, "number:Transactions": { "one_of": [ 1 ] }, "string:Country": { "one_of": [ "uk", "de" ] } } }, "description": "Used by ACME RPA to create tickets for disputes.", "model": { "label_thresholds": [ { "name": [ "Some Label" ], "threshold": 0.37 }, { "name": [ "Another Label" ], "threshold": 0.46 }, { "name": [ "Parent Label", "Child Label" ], "threshold": 0.41 } ], "version": 8 }, "name": "dispute", "title": "Collateral Disputes" } }'
curl -X PUT 'https://<my_api_endpoint>/api/v1/datasets/project1/collateral/streams' \ -H "Authorization: Bearer $REINFER_TOKEN" \ -H "Content-Type: application/json" \ -d '{ "stream": { "comment_filter": { "user_properties": { "number:Spend": { "maximum": 100000, "minimum": 100 }, "number:Transactions": { "one_of": [ 1 ] }, "string:Country": { "one_of": [ "uk", "de" ] } } }, "description": "Used by ACME RPA to create tickets for disputes.", "model": { "label_thresholds": [ { "name": [ "Some Label" ], "threshold": 0.37 }, { "name": [ "Another Label" ], "threshold": 0.46 }, { "name": [ "Parent Label", "Child Label" ], "threshold": 0.41 } ], "version": 8 }, "name": "dispute", "title": "Collateral Disputes" } }' - Node
const request = require("request"); request.put( { url: "https://<my_api_endpoint>/api/v1/datasets/project1/collateral/streams", headers: { Authorization: "Bearer " + process.env.REINFER_TOKEN, }, json: true, body: { stream: { comment_filter: { user_properties: { "number:Spend": { maximum: 100000, minimum: 100 }, "number:Transactions": { one_of: [1] }, "string:Country": { one_of: ["uk", "de"] }, }, }, description: "Used by ACME RPA to create tickets for disputes.", model: { label_thresholds: [ { name: ["Some Label"], threshold: 0.37 }, { name: ["Another Label"], threshold: 0.46 }, { name: ["Parent Label", "Child Label"], threshold: 0.41 }, ], version: 8, }, name: "dispute", title: "Collateral Disputes", }, }, }, function (error, response, json) { // digest response console.log(JSON.stringify(json, null, 2)); } );
const request = require("request"); request.put( { url: "https://<my_api_endpoint>/api/v1/datasets/project1/collateral/streams", headers: { Authorization: "Bearer " + process.env.REINFER_TOKEN, }, json: true, body: { stream: { comment_filter: { user_properties: { "number:Spend": { maximum: 100000, minimum: 100 }, "number:Transactions": { one_of: [1] }, "string:Country": { one_of: ["uk", "de"] }, }, }, description: "Used by ACME RPA to create tickets for disputes.", model: { label_thresholds: [ { name: ["Some Label"], threshold: 0.37 }, { name: ["Another Label"], threshold: 0.46 }, { name: ["Parent Label", "Child Label"], threshold: 0.41 }, ], version: 8, }, name: "dispute", title: "Collateral Disputes", }, }, }, function (error, response, json) { // digest response console.log(JSON.stringify(json, null, 2)); } ); - Python
import json import os import requests response = requests.put( "https://<my_api_endpoint>/api/v1/datasets/project1/collateral/streams", headers={"Authorization": "Bearer " + os.environ["REINFER_TOKEN"]}, json={ "stream": { "name": "dispute", "title": "Collateral Disputes", "description": "Used by ACME RPA to create tickets for disputes.", "model": { "version": 8, "label_thresholds": [ {"name": ["Some Label"], "threshold": 0.37}, {"name": ["Another Label"], "threshold": 0.46}, { "name": ["Parent Label", "Child Label"], "threshold": 0.41, }, ], }, "comment_filter": { "user_properties": { "string:Country": {"one_of": ["uk", "de"]}, "number:Spend": {"minimum": 100, "maximum": 100000}, "number:Transactions": {"one_of": [1]}, } }, } }, ) print(json.dumps(response.json(), indent=2, sort_keys=True))
import json import os import requests response = requests.put( "https://<my_api_endpoint>/api/v1/datasets/project1/collateral/streams", headers={"Authorization": "Bearer " + os.environ["REINFER_TOKEN"]}, json={ "stream": { "name": "dispute", "title": "Collateral Disputes", "description": "Used by ACME RPA to create tickets for disputes.", "model": { "version": 8, "label_thresholds": [ {"name": ["Some Label"], "threshold": 0.37}, {"name": ["Another Label"], "threshold": 0.46}, { "name": ["Parent Label", "Child Label"], "threshold": 0.41, }, ], }, "comment_filter": { "user_properties": { "string:Country": {"one_of": ["uk", "de"]}, "number:Spend": {"minimum": 100, "maximum": 100000}, "number:Transactions": {"one_of": [1]}, } }, } }, ) print(json.dumps(response.json(), indent=2, sort_keys=True)) - Response
{ "status": "ok", "stream": { "context": "0", "created_at": "2019-08-03T12:30:00.123456Z", "dataset_id": "abcdef0123456789", "description": "Used by ACME RPA to create tickets for disputes.", "id": "0123456789abcdef", "model": { "version": 8 }, "name": "dispute", "title": "Collateral Disputes", "updated_at": "2019-08-03T12:30:00.123456Z" } }
{ "status": "ok", "stream": { "context": "0", "created_at": "2019-08-03T12:30:00.123456Z", "dataset_id": "abcdef0123456789", "description": "Used by ACME RPA to create tickets for disputes.", "id": "0123456789abcdef", "model": { "version": 8 }, "name": "dispute", "title": "Collateral Disputes", "updated_at": "2019-08-03T12:30:00.123456Z" } }
/api/v1/datasets/<project>/<dataset_name>/streams/<stream_name>
/api/v1/datasets/<project>/<dataset_name>/streams/<stream_name>
- Bash
curl -X GET 'https://<my_api_endpoint>/api/v1/datasets/project1/collateral/streams/dispute' \ -H "Authorization: Bearer $REINFER_TOKEN"
curl -X GET 'https://<my_api_endpoint>/api/v1/datasets/project1/collateral/streams/dispute' \ -H "Authorization: Bearer $REINFER_TOKEN" - Node
const request = require("request"); request.get( { url: "https://<my_api_endpoint>/api/v1/datasets/project1/collateral/streams/dispute", headers: { Authorization: "Bearer " + process.env.REINFER_TOKEN, }, }, function (error, response, json) { // digest response console.log(JSON.stringify(json, null, 2)); } );
const request = require("request"); request.get( { url: "https://<my_api_endpoint>/api/v1/datasets/project1/collateral/streams/dispute", headers: { Authorization: "Bearer " + process.env.REINFER_TOKEN, }, }, function (error, response, json) { // digest response console.log(JSON.stringify(json, null, 2)); } ); - Python
import json import os import requests response = requests.get( "https://<my_api_endpoint>/api/v1/datasets/project1/collateral/streams/dispute", headers={"Authorization": "Bearer " + os.environ["REINFER_TOKEN"]}, ) print(json.dumps(response.json(), indent=2, sort_keys=True))
import json import os import requests response = requests.get( "https://<my_api_endpoint>/api/v1/datasets/project1/collateral/streams/dispute", headers={"Authorization": "Bearer " + os.environ["REINFER_TOKEN"]}, ) print(json.dumps(response.json(), indent=2, sort_keys=True)) - Response
{ "status": "ok", "stream": { "context": "0", "created_at": "2019-08-03T12:30:00.123456Z", "dataset_id": "abcdef0123456789", "description": "Used by ACME RPA to create tickets for disputes.", "id": "0123456789abcdef", "model": { "version": 8 }, "name": "dispute", "title": "Collateral Disputes", "updated_at": "2019-08-03T12:30:00.123456Z" } }
{ "status": "ok", "stream": { "context": "0", "created_at": "2019-08-03T12:30:00.123456Z", "dataset_id": "abcdef0123456789", "description": "Used by ACME RPA to create tickets for disputes.", "id": "0123456789abcdef", "model": { "version": 8 }, "name": "dispute", "title": "Collateral Disputes", "updated_at": "2019-08-03T12:30:00.123456Z" } }
/api/v1/datasets/<project>/<dataset_name>/streams
/api/v1/datasets/<project>/<dataset_name>/streams
- Bash
curl -X GET 'https://<my_api_endpoint>/api/v1/datasets/project1/collateral/streams' \ -H "Authorization: Bearer $REINFER_TOKEN"
curl -X GET 'https://<my_api_endpoint>/api/v1/datasets/project1/collateral/streams' \ -H "Authorization: Bearer $REINFER_TOKEN" - Node
const request = require("request"); request.get( { url: "https://<my_api_endpoint>/api/v1/datasets/project1/collateral/streams", headers: { Authorization: "Bearer " + process.env.REINFER_TOKEN, }, }, function (error, response, json) { // digest response console.log(JSON.stringify(json, null, 2)); } );
const request = require("request"); request.get( { url: "https://<my_api_endpoint>/api/v1/datasets/project1/collateral/streams", headers: { Authorization: "Bearer " + process.env.REINFER_TOKEN, }, }, function (error, response, json) { // digest response console.log(JSON.stringify(json, null, 2)); } ); - Python
import json import os import requests response = requests.get( "https://<my_api_endpoint>/api/v1/datasets/project1/collateral/streams", headers={"Authorization": "Bearer " + os.environ["REINFER_TOKEN"]}, ) print(json.dumps(response.json(), indent=2, sort_keys=True))
import json import os import requests response = requests.get( "https://<my_api_endpoint>/api/v1/datasets/project1/collateral/streams", headers={"Authorization": "Bearer " + os.environ["REINFER_TOKEN"]}, ) print(json.dumps(response.json(), indent=2, sort_keys=True)) - Response
{ "status": "ok", "stream": { "context": "0", "created_at": "2019-08-03T12:30:00.123456Z", "dataset_id": "abcdef0123456789", "description": "Used by ACME RPA to create tickets for disputes.", "id": "0123456789abcdef", "model": { "version": 8 }, "name": "dispute", "title": "Collateral Disputes", "updated_at": "2019-08-03T12:30:00.123456Z" } }
{ "status": "ok", "stream": { "context": "0", "created_at": "2019-08-03T12:30:00.123456Z", "dataset_id": "abcdef0123456789", "description": "Used by ACME RPA to create tickets for disputes.", "id": "0123456789abcdef", "model": { "version": 8 }, "name": "dispute", "title": "Collateral Disputes", "updated_at": "2019-08-03T12:30:00.123456Z" } }
/api/v1/datasets/<project>/<dataset_name>/streams/<stream_name>
/api/v1/datasets/<project>/<dataset_name>/streams/<stream_name>
- Bash
curl -X DELETE 'https://<my_api_endpoint>/api/v1/datasets/project1/collateral/streams/dispute' \ -H "Authorization: Bearer $REINFER_TOKEN"
curl -X DELETE 'https://<my_api_endpoint>/api/v1/datasets/project1/collateral/streams/dispute' \ -H "Authorization: Bearer $REINFER_TOKEN" - Node
const request = require("request"); request.delete( { url: "https://<my_api_endpoint>/api/v1/datasets/project1/collateral/streams/dispute", headers: { Authorization: "Bearer " + process.env.REINFER_TOKEN, }, }, function (error, response, json) { // digest response console.log(JSON.stringify(json, null, 2)); } );
const request = require("request"); request.delete( { url: "https://<my_api_endpoint>/api/v1/datasets/project1/collateral/streams/dispute", headers: { Authorization: "Bearer " + process.env.REINFER_TOKEN, }, }, function (error, response, json) { // digest response console.log(JSON.stringify(json, null, 2)); } ); - Python
import json import os import requests response = requests.delete( "https://<my_api_endpoint>/api/v1/datasets/project1/collateral/streams/dispute", headers={"Authorization": "Bearer " + os.environ["REINFER_TOKEN"]}, ) print(json.dumps(response.json(), indent=2, sort_keys=True))
import json import os import requests response = requests.delete( "https://<my_api_endpoint>/api/v1/datasets/project1/collateral/streams/dispute", headers={"Authorization": "Bearer " + os.environ["REINFER_TOKEN"]}, ) print(json.dumps(response.json(), indent=2, sort_keys=True)) - Response
{ "status": "ok" }
{ "status": "ok" }
/api/v1/datasets/<project>/<dataset_name>/streams/<stream_name>/fetch
/api/v1/datasets/<project>/<dataset_name>/streams/<stream_name>/fetch
- Bash
curl -X POST 'https://<my_api_endpoint>/api/v1/datasets/project1/collateral/streams/dispute/fetch' \ -H "Authorization: Bearer $REINFER_TOKEN" \ -H "Content-Type: application/json" \ -d '{ "size": 8 }'
curl -X POST 'https://<my_api_endpoint>/api/v1/datasets/project1/collateral/streams/dispute/fetch' \ -H "Authorization: Bearer $REINFER_TOKEN" \ -H "Content-Type: application/json" \ -d '{ "size": 8 }' - Node
const request = require("request"); request.post( { url: "https://<my_api_endpoint>/api/v1/datasets/project1/collateral/streams/dispute/fetch", headers: { Authorization: "Bearer " + process.env.REINFER_TOKEN, }, json: true, body: { size: 8 }, }, function (error, response, json) { // digest response console.log(JSON.stringify(json, null, 2)); } );
const request = require("request"); request.post( { url: "https://<my_api_endpoint>/api/v1/datasets/project1/collateral/streams/dispute/fetch", headers: { Authorization: "Bearer " + process.env.REINFER_TOKEN, }, json: true, body: { size: 8 }, }, function (error, response, json) { // digest response console.log(JSON.stringify(json, null, 2)); } ); - Python
import json import os import requests response = requests.post( "https://<my_api_endpoint>/api/v1/datasets/project1/collateral/streams/dispute/fetch", headers={"Authorization": "Bearer " + os.environ["REINFER_TOKEN"]}, json={"size": 8}, ) print(json.dumps(response.json(), indent=2, sort_keys=True))
import json import os import requests response = requests.post( "https://<my_api_endpoint>/api/v1/datasets/project1/collateral/streams/dispute/fetch", headers={"Authorization": "Bearer " + os.environ["REINFER_TOKEN"]}, json={"size": 8}, ) print(json.dumps(response.json(), indent=2, sort_keys=True)) - Response
{ "filtered": 6, "is_end_sequence": false, "results": [ { "comment": { "context": "1", "created_at": "2018-10-15T15:39:51.815000Z", "id": "0123456789abcdef", "last_modified": "2018-10-15T15:39:51.815000Z", "messages": [ { "body": { "text": "Hi Bob,\n\nCould you send me today's figures?" }, "from": "alice@company.com", "sent_at": "2011-12-11T11:02:03.000000+00:00", "signature": { "text": "Thanks,\nAlice" }, "subject": { "text": "Today's figures" }, "to": ["bob@organisation.org"] } ], "source_id": "18ba5ce699f8da1f", "text_format": "plain", "thread_id": "3c314542414538353242393446393", "timestamp": "2011-12-11T01:02:03.000000+00:00", "uid": "18ba5ce699f8da1f.0123456789abcdef", "user_properties": { "number:Participants": 2, "number:Position in Thread": 1, "number:Recipients": 1, "string:Folder": "Sent (/ Sent)", "string:Has Signature": "Yes", "string:Message ID": "<abcdef@abc.company.com>", "string:Sender": "alice@company.com", "string:Sender Domain": "company.com", "string:Thread": "<abcdef@abc.company.com>" } }, "entities": [], "labels": [], "sequence_id": "qs8QcHIBAACuYzDeit-pwQdWGYGQImdy" }, { "comment": { "context": "1", "created_at": "2018-10-15T18:39:51.815000Z", "id": "abcdef0123456789", "last_modified": "2018-10-15T18:39:51.815000Z", "messages": [ { "body": { "text": "Alice,\n\nHere are the figures for today." }, "from": "bob@organisation.org", "sent_at": "2011-12-11T11:02:03.000000+00:00", "signature": { "text": "Regards,\nBob" }, "subject": { "text": "RE: Today's figures" }, "to": ["alice@company.com"] } ], "source_id": "18ba5ce699f8da1f", "text_format": "plain", "thread_id": "3c314542414538353242393446393", "timestamp": "2011-12-11T02:02:03.000000+00:00", "uid": "18ba5ce699f8da1f.abcdef0123456789", "user_properties": { "number:Participants": 3, "number:Position in Thread": 2, "number:Recipients": 2, "string:Folder": "Inbox (/ Inbox)", "string:Has Signature": "No", "string:Message ID": "def@xyz.project.com", "string:Sender": "bob@organisation.org", "string:Sender Domain": "organisation.org", "string:Thread": "<abcdef@abc.company.com>" } }, "entities": [], "labels": [ { "name": ["Some Top-Level Label"], "probability": 0.8374786376953125 }, { "name": ["Another Top-Level Label", "Child Label"], "probability": 0.6164003014564514 } ], "sequence_id": "qs8QcHIBAADJ1p3W2FtmBB3QiOJsCJlR" } ], "sequence_id": "qs8QcHIBAADJ1p3W2FtmBB3QiOJsCJlR", "status": "ok" }
{ "filtered": 6, "is_end_sequence": false, "results": [ { "comment": { "context": "1", "created_at": "2018-10-15T15:39:51.815000Z", "id": "0123456789abcdef", "last_modified": "2018-10-15T15:39:51.815000Z", "messages": [ { "body": { "text": "Hi Bob,\n\nCould you send me today's figures?" }, "from": "alice@company.com", "sent_at": "2011-12-11T11:02:03.000000+00:00", "signature": { "text": "Thanks,\nAlice" }, "subject": { "text": "Today's figures" }, "to": ["bob@organisation.org"] } ], "source_id": "18ba5ce699f8da1f", "text_format": "plain", "thread_id": "3c314542414538353242393446393", "timestamp": "2011-12-11T01:02:03.000000+00:00", "uid": "18ba5ce699f8da1f.0123456789abcdef", "user_properties": { "number:Participants": 2, "number:Position in Thread": 1, "number:Recipients": 1, "string:Folder": "Sent (/ Sent)", "string:Has Signature": "Yes", "string:Message ID": "<abcdef@abc.company.com>", "string:Sender": "alice@company.com", "string:Sender Domain": "company.com", "string:Thread": "<abcdef@abc.company.com>" } }, "entities": [], "labels": [], "sequence_id": "qs8QcHIBAACuYzDeit-pwQdWGYGQImdy" }, { "comment": { "context": "1", "created_at": "2018-10-15T18:39:51.815000Z", "id": "abcdef0123456789", "last_modified": "2018-10-15T18:39:51.815000Z", "messages": [ { "body": { "text": "Alice,\n\nHere are the figures for today." }, "from": "bob@organisation.org", "sent_at": "2011-12-11T11:02:03.000000+00:00", "signature": { "text": "Regards,\nBob" }, "subject": { "text": "RE: Today's figures" }, "to": ["alice@company.com"] } ], "source_id": "18ba5ce699f8da1f", "text_format": "plain", "thread_id": "3c314542414538353242393446393", "timestamp": "2011-12-11T02:02:03.000000+00:00", "uid": "18ba5ce699f8da1f.abcdef0123456789", "user_properties": { "number:Participants": 3, "number:Position in Thread": 2, "number:Recipients": 2, "string:Folder": "Inbox (/ Inbox)", "string:Has Signature": "No", "string:Message ID": "def@xyz.project.com", "string:Sender": "bob@organisation.org", "string:Sender Domain": "organisation.org", "string:Thread": "<abcdef@abc.company.com>" } }, "entities": [], "labels": [ { "name": ["Some Top-Level Label"], "probability": 0.8374786376953125 }, { "name": ["Another Top-Level Label", "Child Label"], "probability": 0.6164003014564514 } ], "sequence_id": "qs8QcHIBAADJ1p3W2FtmBB3QiOJsCJlR" } ], "sequence_id": "qs8QcHIBAADJ1p3W2FtmBB3QiOJsCJlR", "status": "ok" }
Once a stream is created, it can be queried to fetch comments and their predicted labels and general fields. Below are some important aspects to keep in mind when fetching comments from a stream.
Comment Queue
When a stream is created, its initial position is set to be equal to its creation time. If needed, you can set the stream to a different position (either forwards or backwards in time) using the reset endpoint. The stream will return comments starting from its current position. The position of the comment in the comment queue is determined by the order in which the comments were uploaded.
Advancing Your Position in the Queue
Since the stream will always return comments starting from its current position, it should be advanced to the next position after each fetch request by using the advance endpoint. This way the API guarantees at-least-once processing of all comments - if your application fails while processing a batch, it will pick up the same batch on restart. (Note that since an application can successfully process a comment but fail at the advance step, it is important to handle seeing a comment multiple times).
sequence_id
contained
in the response), or advancing it for each individual comment (using the comment's
sequence_id
contained in the response).
Comment Filter
comment_filter
was specified when creating the stream, comments
not matching the filter will not be included in the results, but will still count
towards the requested size
, so you may see responses where all of
size
comments are filtered out, leading to an empty
results
array. In the example below, we request a batch of 8
comments, all of which are filtered out.
{
"filtered": 8,
"results": [],
"sequence_id": "qs8QcHIBAADJ1p3W2FtmBB3QiOJsCJlR",
"status": "ok"
}
{
"filtered": 8,
"results": [],
"sequence_id": "qs8QcHIBAADJ1p3W2FtmBB3QiOJsCJlR",
"status": "ok"
}
max_filtered
parameter, which prevents filtered comments from
counting towards the requested size
.
NAME | TYPE | REQUIRED | DESCRIPTION |
---|---|---|---|
size | number | yes | The number of comments to fetch for this stream. Will return fewer if it reaches end of batch or if comments are filtered out according to the comment filter. Max value is 1024. |
max_filtered | number | no | Convenience parameter for streams with a comment filter. When
provided, up to max_filtered filtered comments
will not count towards the requested size . This
is useful if you expect a large number of comments to not match
the filter. Has no effect on streams without a comment filter.
Max value is 1024.
|
NAME | TYPE | DESCRIPTION |
---|---|---|
status | string | ok if the request is successful, or
error in case of an error. See the Overview to learn more about error responses.
|
filtered | number | Number of comments that were filtered out according to a comment
filter. If the stream was created without a filter, this number will
always be 0 .
|
sequence_id | string | The batch sequence ID. Used to acknowledge processing of this batch and advance stream to the next batch. |
is_end_sequence | _bool_ | True if there were no additional results in the stream at the time the request was made. False otherwise. |
results | array<Result> | An array containing result objects. |
Result
has the following format:
NAME | TYPE | DESCRIPTION |
---|---|---|
comment | Comment | Comment data. For a detailed explanation, see the Comment Reference. |
sequence_id | string | The comment's sequence ID. Used to acknowledge processing of this comment and advance stream to the next comment. |
labels | array<Label> | An array containing predicted labels for this comment, where
Label has the format described here.
|
entities | array<Entity> | An array containing predicted entities for this comment, where
Entity has a format described here.
|
label_properties | array<LabelProperty> | An array containing predicted label properties for this comment,
where LabelProperty has a format described here.
|
/api/v1/datasets/<project>/<dataset_name>/streams/<stream_name>/advance
/api/v1/datasets/<project>/<dataset_name>/streams/<stream_name>/advance
Permissions required: Consume streams, View labels.
- Bash
curl -X POST 'https://<my_api_endpoint>/api/v1/datasets/project1/collateral/streams/dispute/advance' \ -H "Authorization: Bearer $REINFER_TOKEN" \ -H "Content-Type: application/json" \ -d '{ "sequence_id": "qs8QcHIBAADJ1p3W2FtmBB3QiOJsCJlR" }'
curl -X POST 'https://<my_api_endpoint>/api/v1/datasets/project1/collateral/streams/dispute/advance' \ -H "Authorization: Bearer $REINFER_TOKEN" \ -H "Content-Type: application/json" \ -d '{ "sequence_id": "qs8QcHIBAADJ1p3W2FtmBB3QiOJsCJlR" }' - Node
const request = require("request"); request.post( { url: "https://<my_api_endpoint>/api/v1/datasets/project1/collateral/streams/dispute/advance", headers: { Authorization: "Bearer " + process.env.REINFER_TOKEN, }, json: true, body: { sequence_id: "qs8QcHIBAADJ1p3W2FtmBB3QiOJsCJlR" }, }, function (error, response, json) { // digest response console.log(JSON.stringify(json, null, 2)); } );
const request = require("request"); request.post( { url: "https://<my_api_endpoint>/api/v1/datasets/project1/collateral/streams/dispute/advance", headers: { Authorization: "Bearer " + process.env.REINFER_TOKEN, }, json: true, body: { sequence_id: "qs8QcHIBAADJ1p3W2FtmBB3QiOJsCJlR" }, }, function (error, response, json) { // digest response console.log(JSON.stringify(json, null, 2)); } ); - Python
import json import os import requests response = requests.post( "https://<my_api_endpoint>/api/v1/datasets/project1/collateral/streams/dispute/advance", headers={"Authorization": "Bearer " + os.environ["REINFER_TOKEN"]}, json={"sequence_id": "qs8QcHIBAADJ1p3W2FtmBB3QiOJsCJlR"}, ) print(json.dumps(response.json(), indent=2, sort_keys=True))
import json import os import requests response = requests.post( "https://<my_api_endpoint>/api/v1/datasets/project1/collateral/streams/dispute/advance", headers={"Authorization": "Bearer " + os.environ["REINFER_TOKEN"]}, json={"sequence_id": "qs8QcHIBAADJ1p3W2FtmBB3QiOJsCJlR"}, ) print(json.dumps(response.json(), indent=2, sort_keys=True)) - Response
{ "sequence_id": "qs8QcHIBAADJ1p3W2FtmBB3QiOJsCJlR", "status": "ok" }
{ "sequence_id": "qs8QcHIBAADJ1p3W2FtmBB3QiOJsCJlR", "status": "ok" }
sequence_id
which represents the position it has fetched up to.
Passing that same sequence_id
to the advance api will make sure
that next time a fetch is performed on the stream it will start from this position.
You can advance to the next batch by using the current batch's
sequence_id
. Alternatively, you can advance to the next comment
by using the current comment's sequence_id
.
Since an application can successfully process a comment but fail at the advance step, it is important to handle seeing a comment multiple times on the client application side.
NAME | TYPE | REQUIRED | DESCRIPTION |
---|---|---|---|
sequence_id | string | yes | The sequence ID to advance the stream to. |
/api/v1/datasets/<project>/<dataset_name>/streams/<stream_name>/reset
/api/v1/datasets/<project>/<dataset_name>/streams/<stream_name>/reset
Permissions required: Consume streams, View labels.
- Bash
curl -X POST 'https://<my_api_endpoint>/api/v1/datasets/project1/collateral/streams/dispute/reset' \ -H "Authorization: Bearer $REINFER_TOKEN" \ -H "Content-Type: application/json" \ -d '{ "to_comment_created_at": "2020-06-03T16:05:00" }'
curl -X POST 'https://<my_api_endpoint>/api/v1/datasets/project1/collateral/streams/dispute/reset' \ -H "Authorization: Bearer $REINFER_TOKEN" \ -H "Content-Type: application/json" \ -d '{ "to_comment_created_at": "2020-06-03T16:05:00" }' - Node
const request = require("request"); request.post( { url: "https://<my_api_endpoint>/api/v1/datasets/project1/collateral/streams/dispute/reset", headers: { Authorization: "Bearer " + process.env.REINFER_TOKEN, }, json: true, body: { to_comment_created_at: "2020-06-03T16:05:00" }, }, function (error, response, json) { // digest response console.log(JSON.stringify(json, null, 2)); } );
const request = require("request"); request.post( { url: "https://<my_api_endpoint>/api/v1/datasets/project1/collateral/streams/dispute/reset", headers: { Authorization: "Bearer " + process.env.REINFER_TOKEN, }, json: true, body: { to_comment_created_at: "2020-06-03T16:05:00" }, }, function (error, response, json) { // digest response console.log(JSON.stringify(json, null, 2)); } ); - Python
import json import os import requests response = requests.post( "https://<my_api_endpoint>/api/v1/datasets/project1/collateral/streams/dispute/reset", headers={"Authorization": "Bearer " + os.environ["REINFER_TOKEN"]}, json={"to_comment_created_at": "2020-06-03T16:05:00"}, ) print(json.dumps(response.json(), indent=2, sort_keys=True))
import json import os import requests response = requests.post( "https://<my_api_endpoint>/api/v1/datasets/project1/collateral/streams/dispute/reset", headers={"Authorization": "Bearer " + os.environ["REINFER_TOKEN"]}, json={"to_comment_created_at": "2020-06-03T16:05:00"}, ) print(json.dumps(response.json(), indent=2, sort_keys=True)) - Response
{ "sequence_id": "4LvtenIBAAA=", "status": "ok", "stream": { "created_at": "2019-08-03T12:30:00.123456Z", "dataset_id": "abcdef0123456789", "description": "Used by ACME RPA to create tickets for disputes.", "id": "0123456789abcdef", "model": { "version": 8 }, "name": "dispute", "title": "Collateral Disputes", "updated_at": "2019-08-03T12:30:00.123456Z" } }
{ "sequence_id": "4LvtenIBAAA=", "status": "ok", "stream": { "created_at": "2019-08-03T12:30:00.123456Z", "dataset_id": "abcdef0123456789", "description": "Used by ACME RPA to create tickets for disputes.", "id": "0123456789abcdef", "model": { "version": 8 }, "name": "dispute", "title": "Collateral Disputes", "updated_at": "2019-08-03T12:30:00.123456Z" } }
created_at
property, rather than its timestamp
property).
NAME | TYPE | REQUIRED | DESCRIPTION |
---|---|---|---|
to_comment_created_at | string | yes | A ISO-8601 timestamp. |
sequence_id
corresponding to the new
stream position.
/api/v1/datasets/<project>/<dataset_name>/streams/<stream_name>/exceptions
/api/v1/datasets/<project>/<dataset_name>/streams/<stream_name>/exceptions
- Bash
curl -X PUT 'https://<my_api_endpoint>/api/v1/datasets/project1/collateral/streams/dispute/exceptions' \ -H "Authorization: Bearer $REINFER_TOKEN" \ -H "Content-Type: application/json" \ -d '{ "exceptions": [ { "metadata": { "type": "No Prediction" }, "uid": "18ba5ce699f8da1f.abcdef0123456789" }, { "metadata": { "type": "Wrong Prediction" }, "uid": "18ba5ce699f8da1f.0123456789abcdef" } ] }'
curl -X PUT 'https://<my_api_endpoint>/api/v1/datasets/project1/collateral/streams/dispute/exceptions' \ -H "Authorization: Bearer $REINFER_TOKEN" \ -H "Content-Type: application/json" \ -d '{ "exceptions": [ { "metadata": { "type": "No Prediction" }, "uid": "18ba5ce699f8da1f.abcdef0123456789" }, { "metadata": { "type": "Wrong Prediction" }, "uid": "18ba5ce699f8da1f.0123456789abcdef" } ] }' - Node
const request = require("request"); request.put( { url: "https://<my_api_endpoint>/api/v1/datasets/project1/collateral/streams/dispute/exceptions", headers: { Authorization: "Bearer " + process.env.REINFER_TOKEN, }, json: true, body: { exceptions: [ { metadata: { type: "No Prediction" }, uid: "18ba5ce699f8da1f.abcdef0123456789", }, { metadata: { type: "Wrong Prediction" }, uid: "18ba5ce699f8da1f.0123456789abcdef", }, ], }, }, function (error, response, json) { // digest response console.log(JSON.stringify(json, null, 2)); } );
const request = require("request"); request.put( { url: "https://<my_api_endpoint>/api/v1/datasets/project1/collateral/streams/dispute/exceptions", headers: { Authorization: "Bearer " + process.env.REINFER_TOKEN, }, json: true, body: { exceptions: [ { metadata: { type: "No Prediction" }, uid: "18ba5ce699f8da1f.abcdef0123456789", }, { metadata: { type: "Wrong Prediction" }, uid: "18ba5ce699f8da1f.0123456789abcdef", }, ], }, }, function (error, response, json) { // digest response console.log(JSON.stringify(json, null, 2)); } ); - Python
import json import os import requests response = requests.put( "https://<my_api_endpoint>/api/v1/datasets/project1/collateral/streams/dispute/exceptions", headers={"Authorization": "Bearer " + os.environ["REINFER_TOKEN"]}, json={ "exceptions": [ { "uid": "18ba5ce699f8da1f.abcdef0123456789", "metadata": {"type": "No Prediction"}, }, { "uid": "18ba5ce699f8da1f.0123456789abcdef", "metadata": {"type": "Wrong Prediction"}, }, ] }, ) print(json.dumps(response.json(), indent=2, sort_keys=True))
import json import os import requests response = requests.put( "https://<my_api_endpoint>/api/v1/datasets/project1/collateral/streams/dispute/exceptions", headers={"Authorization": "Bearer " + os.environ["REINFER_TOKEN"]}, json={ "exceptions": [ { "uid": "18ba5ce699f8da1f.abcdef0123456789", "metadata": {"type": "No Prediction"}, }, { "uid": "18ba5ce699f8da1f.0123456789abcdef", "metadata": {"type": "Wrong Prediction"}, }, ] }, ) print(json.dumps(response.json(), indent=2, sort_keys=True)) - Response
{ "status": "ok" }
{ "status": "ok" }
This endpoint allows you to tag comments as exceptions in the platform, so that a model trainer can review and label them in order to improve the model. We recommend to tag the comments for which the model returned no predictions, and comments for which the model returned wrong predictions. (For help with designing the exception handling flow, please check the Integration Guide).
NAME | TYPE | REQUIRED | DESCRIPTION |
---|---|---|---|
exceptions | array<Exception> | yes | A list of exceptions. |
Exception
has the following format:
NAME | TYPE | REQUIRED | DESCRIPTION |
---|---|---|---|
uid | string | yes | The uid of the comment that should be tagged as
exception.
|
metadata | Metadata | yes | An object containing exception metadata. |
Metadata
has the following format:
NAME | TYPE | REQUIRED | DESCRIPTION |
---|---|---|---|
type | string | yes | The exception type will be available as a filter property in the Communications Mining UI. The value can be an arbitrary string. Please choose a short, easy-to-understand string such as "No Prediction" and "Wrong Prediction". |
/api/v1/datasets/<project>/<dataset_name>/streams/<stream_name>/exceptions?uid=<comment_uid0>[&uid=<comment_uid1>...]
/api/v1/datasets/<project>/<dataset_name>/streams/<stream_name>/exceptions?uid=<comment_uid0>[&uid=<comment_uid1>...]
- Bash
curl -X DELETE 'https://<my_api_endpoint>/api/v1/datasets/project1/collateral/streams/dispute/exceptions?uid=18ba5ce699f8da1f.abcdef0123456789&uid=18ba5ce699f8da1f.0123456789abcdef' \ -H "Authorization: Bearer $REINFER_TOKEN"
curl -X DELETE 'https://<my_api_endpoint>/api/v1/datasets/project1/collateral/streams/dispute/exceptions?uid=18ba5ce699f8da1f.abcdef0123456789&uid=18ba5ce699f8da1f.0123456789abcdef' \ -H "Authorization: Bearer $REINFER_TOKEN" - Node
const request = require("request"); request.delete( { url: "https://<my_api_endpoint>/api/v1/datasets/project1/collateral/streams/dispute/exceptions?uid=18ba5ce699f8da1f.abcdef0123456789&uid=18ba5ce699f8da1f.0123456789abcdef", headers: { Authorization: "Bearer " + process.env.REINFER_TOKEN, }, }, function (error, response, json) { // digest response console.log(JSON.stringify(json, null, 2)); } );
const request = require("request"); request.delete( { url: "https://<my_api_endpoint>/api/v1/datasets/project1/collateral/streams/dispute/exceptions?uid=18ba5ce699f8da1f.abcdef0123456789&uid=18ba5ce699f8da1f.0123456789abcdef", headers: { Authorization: "Bearer " + process.env.REINFER_TOKEN, }, }, function (error, response, json) { // digest response console.log(JSON.stringify(json, null, 2)); } ); - Python
import json import os import requests response = requests.delete( "https://<my_api_endpoint>/api/v1/datasets/project1/collateral/streams/dispute/exceptions?uid=18ba5ce699f8da1f.abcdef0123456789&uid=18ba5ce699f8da1f.0123456789abcdef", headers={"Authorization": "Bearer " + os.environ["REINFER_TOKEN"]}, ) print(json.dumps(response.json(), indent=2, sort_keys=True))
import json import os import requests response = requests.delete( "https://<my_api_endpoint>/api/v1/datasets/project1/collateral/streams/dispute/exceptions?uid=18ba5ce699f8da1f.abcdef0123456789&uid=18ba5ce699f8da1f.0123456789abcdef", headers={"Authorization": "Bearer " + os.environ["REINFER_TOKEN"]}, ) print(json.dumps(response.json(), indent=2, sort_keys=True)) - Response
{ "status": "ok" }
{ "status": "ok" }
Exceptions can be untagged using the comment UID.