Loading megatron/api_server.py +1 −1 Original line number Diff line number Diff line Loading @@ -61,4 +61,4 @@ class MegatronServer(object): api.add_resource(MegatronGenerate, '/generate', resource_class_args=[model]) def run(self, url): self.app.run(url, debug=False) self.app.run(url, threaded=False, debug=False) megatron/text_generation_utils.py +5 −0 Original line number Diff line number Diff line Loading @@ -162,6 +162,9 @@ def synced_generate(model, context_length_tensor, context_tokens_tensor, max_len def generate(model, sentences=None, max_len=0): if torch.distributed.get_rank() == 0: context_tokens_tensor, context_length_tensor = tokenize_batch(sentences) c = context_length_tensor[0] b = context_tokens_tensor.size(0) start = time.time() send_generate_info(context_tokens_tensor, context_length_tensor, max_len) else: context_length_tensor, context_tokens_tensor, max_len = receive_generate_info() Loading @@ -176,6 +179,8 @@ def generate(model, sentences=None, max_len=0): for i in range(decode_tokens.size(0)): decode_token = decode_tokens[i,:].cpu().numpy().tolist() resp_sentences.append(tokenizer.detokenize(decode_token)) end = time.time() print(str(b)+","+str(c)+","+str(decode_tokens.size(1))+","+str(end-start), flush=True) return resp_sentences def switch(val1, val2, boolean): Loading tools/run_cli.py +3 −1 Original line number Diff line number Diff line Loading @@ -13,6 +13,7 @@ # See the License for the specific language governing permissions and # limitations under the License. import json import sys import urllib2 class PutRequest(urllib2.Request): '''class to handling putting with urllib2''' Loading @@ -21,11 +22,12 @@ class PutRequest(urllib2.Request): return 'PUT' if __name__ == "__main__": url = sys.argv[1] while True: sentence = raw_input("Enter prompt: ") max_len = int(input("Enter number tokens output: ")) data = json.dumps({"sentences": [sentence], "max_len":max_len}) req = PutRequest("http://sc-sdgx2-484:5000/generate", data, {'Content-Type': 'application/json'}) req = PutRequest(url, data, {'Content-Type': 'application/json'}) response = urllib2.urlopen(req) resp_sentences = json.load(response) print("Megatron Response: ") Loading Loading
megatron/api_server.py +1 −1 Original line number Diff line number Diff line Loading @@ -61,4 +61,4 @@ class MegatronServer(object): api.add_resource(MegatronGenerate, '/generate', resource_class_args=[model]) def run(self, url): self.app.run(url, debug=False) self.app.run(url, threaded=False, debug=False)
megatron/text_generation_utils.py +5 −0 Original line number Diff line number Diff line Loading @@ -162,6 +162,9 @@ def synced_generate(model, context_length_tensor, context_tokens_tensor, max_len def generate(model, sentences=None, max_len=0): if torch.distributed.get_rank() == 0: context_tokens_tensor, context_length_tensor = tokenize_batch(sentences) c = context_length_tensor[0] b = context_tokens_tensor.size(0) start = time.time() send_generate_info(context_tokens_tensor, context_length_tensor, max_len) else: context_length_tensor, context_tokens_tensor, max_len = receive_generate_info() Loading @@ -176,6 +179,8 @@ def generate(model, sentences=None, max_len=0): for i in range(decode_tokens.size(0)): decode_token = decode_tokens[i,:].cpu().numpy().tolist() resp_sentences.append(tokenizer.detokenize(decode_token)) end = time.time() print(str(b)+","+str(c)+","+str(decode_tokens.size(1))+","+str(end-start), flush=True) return resp_sentences def switch(val1, val2, boolean): Loading
tools/run_cli.py +3 −1 Original line number Diff line number Diff line Loading @@ -13,6 +13,7 @@ # See the License for the specific language governing permissions and # limitations under the License. import json import sys import urllib2 class PutRequest(urllib2.Request): '''class to handling putting with urllib2''' Loading @@ -21,11 +22,12 @@ class PutRequest(urllib2.Request): return 'PUT' if __name__ == "__main__": url = sys.argv[1] while True: sentence = raw_input("Enter prompt: ") max_len = int(input("Enter number tokens output: ")) data = json.dumps({"sentences": [sentence], "max_len":max_len}) req = PutRequest("http://sc-sdgx2-484:5000/generate", data, {'Content-Type': 'application/json'}) req = PutRequest(url, data, {'Content-Type': 'application/json'}) response = urllib2.urlopen(req) resp_sentences = json.load(response) print("Megatron Response: ") Loading