Commit af1c2e56 authored by Lucas Prates's avatar Lucas Prates
Browse files

[ARM] Fix dropped dollar sign from symbols in branch targets

Summary:
ARMAsmParser was incorrectly dropping a leading dollar sign character
from symbol names in targets of branch instructions. This was caused by
an incorrect assumption that the contents following the dollar sign
token should be handled as a constant immediate, similarly to the #
token.

This patch avoids the operand parsing from consuming the dollar sign
token when it is followed by an identifier, making sure it is properly
parsed as part of the expression.

Reviewers: efriedma

Reviewed By: efriedma

Subscribers: danielkiss, chill, carwil, vhscampos, kristof.beyls, hiraditya, llvm-commits

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D73176
parent 2eff1c3c
Loading
Loading
Loading
Loading
+21 −6
Original line number Diff line number Diff line
@@ -6119,20 +6119,35 @@ bool ARMAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic) {
  case AsmToken::LCurly:
    return parseRegisterList(Operands, !Mnemonic.startswith("clr"));
  case AsmToken::Dollar:
  case AsmToken::Hash:
    // #42 -> immediate.
  case AsmToken::Hash: {
    // #42 -> immediate
    // $ 42 -> immediate
    // $foo -> symbol name
    // $42 -> symbol name
    S = Parser.getTok().getLoc();

    // Favor the interpretation of $-prefixed operands as symbol names.
    // Cases where immediates are explicitly expected are handled by their
    // specific ParseMethod implementations.
    auto AdjacentToken = getLexer().peekTok(/*ShouldSkipSpace=*/false);
    bool ExpectIdentifier = Parser.getTok().is(AsmToken::Dollar) &&
                            (AdjacentToken.is(AsmToken::Identifier) ||
                             AdjacentToken.is(AsmToken::Integer));
    if (!ExpectIdentifier) {
      // Token is not part of identifier. Drop leading $ or # before parsing
      // expression.
      Parser.Lex();
    }

    if (Parser.getTok().isNot(AsmToken::Colon)) {
      bool isNegative = Parser.getTok().is(AsmToken::Minus);
      bool IsNegative = Parser.getTok().is(AsmToken::Minus);
      const MCExpr *ImmVal;
      if (getParser().parseExpression(ImmVal))
        return true;
      const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(ImmVal);
      if (CE) {
        int32_t Val = CE->getValue();
        if (isNegative && Val == 0)
        if (IsNegative && Val == 0)
          ImmVal = MCConstantExpr::create(std::numeric_limits<int32_t>::min(),
                                          getContext());
      }
@@ -6151,7 +6166,7 @@ bool ARMAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic) {
    }
    // w/ a ':' after the '#', it's just like a plain ':'.
    LLVM_FALLTHROUGH;

  }
  case AsmToken::Colon: {
    S = Parser.getTok().getLoc();
    // ":lower16:" and ":upper16:" expression prefixes
+29 −0
Original line number Diff line number Diff line
@@ -13,3 +13,32 @@
@ CHECK: bl	#4                      @ encoding: [0x01,0x00,0x00,0xeb]
@ CHECK: beq	#4                      @ encoding: [0x01,0x00,0x00,0x0a]
@ CHECK: blx	#2                      @ encoding: [0x00,0x00,0x00,0xfb]

@------------------------------------------------------------------------------
@ Leading '$' on branch targets must not be dropped if part of symbol names
@------------------------------------------------------------------------------

        .global $foo
        b $foo
        bl $foo
        beq $foo
        blx $foo
        b $foo + 4

@ CHECK: b      ($foo)                      @ encoding: [A,A,A,0xea]
@ CHECK: bl     ($foo)                      @ encoding: [A,A,A,0xeb]
@ CHECK: beq    ($foo)                      @ encoding: [A,A,A,0x0a]
@ CHECK: blx    ($foo)                      @ encoding: [A,A,A,0xfa]
@ CHECK: b      #($foo)+4                   @ encoding: [A,A,A,0xea]

@------------------------------------------------------------------------------
@ Leading '$' should be allowed to introduce an expression
@------------------------------------------------------------------------------

        .global bar
        b $ 4
        bl $ bar + 4
        blx $ bar
@ CHECK: b	    #4                        @ encoding: [0x01,0x00,0x00,0xea]
@ CHECK: bl     #bar+4                    @ encoding: [A,A,A,0xeb]
@ CHECK: blx    bar                       @ encoding: [A,A,A,0xfa]