From cf099d11218cb6f6c5cce947d6738e347f07fb12 Mon Sep 17 00:00:00 2001 From: Dimitry Andric Date: Sun, 20 Feb 2011 12:57:14 +0000 Subject: Vendor import of llvm trunk r126079: http://llvm.org/svn/llvm-project/llvm/trunk@126079 --- docs/tutorial/LangImpl3.html | 6 +- docs/tutorial/LangImpl4.html | 7 +- docs/tutorial/LangImpl5.html | 5 +- docs/tutorial/LangImpl6.html | 5 +- docs/tutorial/LangImpl7.html | 5 +- docs/tutorial/OCamlLangImpl7.html | 4 +- docs/tutorial/OCamlLangImpl8.html | 365 ++++++++++++++++++++++++++++++++++++++ docs/tutorial/index.html | 2 +- 8 files changed, 389 insertions(+), 10 deletions(-) create mode 100644 docs/tutorial/OCamlLangImpl8.html (limited to 'docs/tutorial') diff --git a/docs/tutorial/LangImpl3.html b/docs/tutorial/LangImpl3.html index 6cd33b010adc..a320ff7e9064 100644 --- a/docs/tutorial/LangImpl3.html +++ b/docs/tutorial/LangImpl3.html @@ -353,8 +353,8 @@ above.

The Module symbol table works just like the Function symbol table when it -comes to name conflicts: if a new function is created with a name was previously -added to the symbol table, it will get implicitly renamed when added to the +comes to name conflicts: if a new function is created with a name that was previously +added to the symbol table, the new function will get implicitly renamed when added to the Module. The code above exploits this fact to determine if there was a previous definition of this function.

@@ -1263,7 +1263,7 @@ int main() { Chris Lattner
The LLVM Compiler Infrastructure
- Last modified: $Date: 2010-09-01 22:09:20 +0200 (Wed, 01 Sep 2010) $ + Last modified: $Date: 2011-02-15 01:24:32 +0100 (Tue, 15 Feb 2011) $ diff --git a/docs/tutorial/LangImpl4.html b/docs/tutorial/LangImpl4.html index d286364d2a56..a2511d959e7b 100644 --- a/docs/tutorial/LangImpl4.html +++ b/docs/tutorial/LangImpl4.html @@ -176,6 +176,8 @@ add a set of optimizations to run. The code looks like this:

// Set up the optimizer pipeline. Start with registering info about how the // target lays out data structures. OurFPM.add(new TargetData(*TheExecutionEngine->getTargetData())); + // Provide basic AliasAnalysis support for GVN. + OurFPM.add(createBasicAliasAnalysisPass()); // Do simple "peephole" optimizations and bit-twiddling optzns. OurFPM.add(createInstructionCombiningPass()); // Reassociate expressions. @@ -507,6 +509,7 @@ at runtime.

#include "llvm/Module.h" #include "llvm/PassManager.h" #include "llvm/Analysis/Verifier.h" +#include "llvm/Analysis/Passes.h" #include "llvm/Target/TargetData.h" #include "llvm/Target/TargetSelect.h" #include "llvm/Transforms/Scalar.h" @@ -1086,6 +1089,8 @@ int main() { // Set up the optimizer pipeline. Start with registering info about how the // target lays out data structures. OurFPM.add(new TargetData(*TheExecutionEngine->getTargetData())); + // Provide basic AliasAnalysis support for GVN. + OurFPM.add(createBasicAliasAnalysisPass()); // Do simple "peephole" optimizations and bit-twiddling optzns. OurFPM.add(createInstructionCombiningPass()); // Reassociate expressions. @@ -1126,7 +1131,7 @@ int main() { Chris Lattner
The LLVM Compiler Infrastructure
- Last modified: $Date: 2010-06-14 08:09:39 +0200 (Mon, 14 Jun 2010) $ + Last modified: $Date: 2010-11-16 18:28:22 +0100 (Tue, 16 Nov 2010) $ diff --git a/docs/tutorial/LangImpl5.html b/docs/tutorial/LangImpl5.html index 4450f2e3a11a..d2c3bd03dc4e 100644 --- a/docs/tutorial/LangImpl5.html +++ b/docs/tutorial/LangImpl5.html @@ -907,6 +907,7 @@ if/then/else and for expressions.. To build this example, use: #include "llvm/Module.h" #include "llvm/PassManager.h" #include "llvm/Analysis/Verifier.h" +#include "llvm/Analysis/Passes.h" #include "llvm/Target/TargetData.h" #include "llvm/Target/TargetSelect.h" #include "llvm/Transforms/Scalar.h" @@ -1731,6 +1732,8 @@ int main() { // Set up the optimizer pipeline. Start with registering info about how the // target lays out data structures. OurFPM.add(new TargetData(*TheExecutionEngine->getTargetData())); + // Provide basic AliasAnalysis support for GVN. + OurFPM.add(createBasicAliasAnalysisPass()); // Do simple "peephole" optimizations and bit-twiddling optzns. OurFPM.add(createInstructionCombiningPass()); // Reassociate expressions. @@ -1771,7 +1774,7 @@ int main() { Chris Lattner
The LLVM Compiler Infrastructure
- Last modified: $Date: 2010-09-01 22:09:20 +0200 (Wed, 01 Sep 2010) $ + Last modified: $Date: 2010-11-16 18:28:22 +0100 (Tue, 16 Nov 2010) $ diff --git a/docs/tutorial/LangImpl6.html b/docs/tutorial/LangImpl6.html index c6a0b8a7d603..7ddf3a099cbc 100644 --- a/docs/tutorial/LangImpl6.html +++ b/docs/tutorial/LangImpl6.html @@ -826,6 +826,7 @@ if/then/else and for expressions.. To build this example, use: #include "llvm/Module.h" #include "llvm/PassManager.h" #include "llvm/Analysis/Verifier.h" +#include "llvm/Analysis/Passes.h" #include "llvm/Target/TargetData.h" #include "llvm/Target/TargetSelect.h" #include "llvm/Transforms/Scalar.h" @@ -1768,6 +1769,8 @@ int main() { // Set up the optimizer pipeline. Start with registering info about how the // target lays out data structures. OurFPM.add(new TargetData(*TheExecutionEngine->getTargetData())); + // Provide basic AliasAnalysis support for GVN. + OurFPM.add(createBasicAliasAnalysisPass()); // Do simple "peephole" optimizations and bit-twiddling optzns. OurFPM.add(createInstructionCombiningPass()); // Reassociate expressions. @@ -1808,7 +1811,7 @@ int main() { Chris Lattner
The LLVM Compiler Infrastructure
- Last modified: $Date: 2010-09-01 22:09:20 +0200 (Wed, 01 Sep 2010) $ + Last modified: $Date: 2010-11-16 18:28:22 +0100 (Tue, 16 Nov 2010) $ diff --git a/docs/tutorial/LangImpl7.html b/docs/tutorial/LangImpl7.html index 1ec99b15bf5c..3b36129d6716 100644 --- a/docs/tutorial/LangImpl7.html +++ b/docs/tutorial/LangImpl7.html @@ -1009,6 +1009,7 @@ variables and var/in support. To build this example, use: #include "llvm/Module.h" #include "llvm/PassManager.h" #include "llvm/Analysis/Verifier.h" +#include "llvm/Analysis/Passes.h" #include "llvm/Target/TargetData.h" #include "llvm/Target/TargetSelect.h" #include "llvm/Transforms/Scalar.h" @@ -2116,6 +2117,8 @@ int main() { // Set up the optimizer pipeline. Start with registering info about how the // target lays out data structures. OurFPM.add(new TargetData(*TheExecutionEngine->getTargetData())); + // Provide basic AliasAnalysis support for GVN. + OurFPM.add(createBasicAliasAnalysisPass()); // Promote allocas to registers. OurFPM.add(createPromoteMemoryToRegisterPass()); // Do simple "peephole" optimizations and bit-twiddling optzns. @@ -2158,7 +2161,7 @@ int main() { Chris Lattner
The LLVM Compiler Infrastructure
- Last modified: $Date: 2010-09-01 22:09:20 +0200 (Wed, 01 Sep 2010) $ + Last modified: $Date: 2010-11-16 18:28:22 +0100 (Tue, 16 Nov 2010) $ diff --git a/docs/tutorial/OCamlLangImpl7.html b/docs/tutorial/OCamlLangImpl7.html index ac31fbfc0766..a9fcd704cf8b 100644 --- a/docs/tutorial/OCamlLangImpl7.html +++ b/docs/tutorial/OCamlLangImpl7.html @@ -30,7 +30,7 @@
  • Full Code Listing
  • -
  • Chapter 8: Conclusion and other useful LLVM +
  • Chapter 8: Conclusion and other useful LLVM tidbits
  • @@ -1901,7 +1901,7 @@ extern double printd(double X) { Chris Lattner
    The LLVM Compiler Infrastructure
    Erick Tryzelaar
    - Last modified: $Date: 2010-05-28 19:07:41 +0200 (Fri, 28 May 2010) $ + Last modified: $Date: 2011-01-01 04:27:43 +0100 (Sat, 01 Jan 2011) $ diff --git a/docs/tutorial/OCamlLangImpl8.html b/docs/tutorial/OCamlLangImpl8.html new file mode 100644 index 000000000000..64a62002c4cc --- /dev/null +++ b/docs/tutorial/OCamlLangImpl8.html @@ -0,0 +1,365 @@ + + + + + Kaleidoscope: Conclusion and other useful LLVM tidbits + + + + + + + +
    Kaleidoscope: Conclusion and other useful LLVM + tidbits
    + + + + +
    +

    Written by Chris Lattner

    +
    + + +
    Tutorial Conclusion
    + + +
    + +

    Welcome to the the final chapter of the "Implementing a +language with LLVM" tutorial. In the course of this tutorial, we have grown +our little Kaleidoscope language from being a useless toy, to being a +semi-interesting (but probably still useless) toy. :)

    + +

    It is interesting to see how far we've come, and how little code it has +taken. We built the entire lexer, parser, AST, code generator, and an +interactive run-loop (with a JIT!) by-hand in under 700 lines of +(non-comment/non-blank) code.

    + +

    Our little language supports a couple of interesting features: it supports +user defined binary and unary operators, it uses JIT compilation for immediate +evaluation, and it supports a few control flow constructs with SSA construction. +

    + +

    Part of the idea of this tutorial was to show you how easy and fun it can be +to define, build, and play with languages. Building a compiler need not be a +scary or mystical process! Now that you've seen some of the basics, I strongly +encourage you to take the code and hack on it. For example, try adding:

    + + + +

    +Have fun - try doing something crazy and unusual. Building a language like +everyone else always has, is much less fun than trying something a little crazy +or off the wall and seeing how it turns out. If you get stuck or want to talk +about it, feel free to email the llvmdev mailing +list: it has lots of people who are interested in languages and are often +willing to help out. +

    + +

    Before we end this tutorial, I want to talk about some "tips and tricks" for generating +LLVM IR. These are some of the more subtle things that may not be obvious, but +are very useful if you want to take advantage of LLVM's capabilities.

    + +
    + + +
    Properties of the LLVM +IR
    + + +
    + +

    We have a couple common questions about code in the LLVM IR form - lets just +get these out of the way right now, shall we?

    + +
    + + +
    Target +Independence
    + + +
    + +

    Kaleidoscope is an example of a "portable language": any program written in +Kaleidoscope will work the same way on any target that it runs on. Many other +languages have this property, e.g. lisp, java, haskell, javascript, python, etc +(note that while these languages are portable, not all their libraries are).

    + +

    One nice aspect of LLVM is that it is often capable of preserving target +independence in the IR: you can take the LLVM IR for a Kaleidoscope-compiled +program and run it on any target that LLVM supports, even emitting C code and +compiling that on targets that LLVM doesn't support natively. You can trivially +tell that the Kaleidoscope compiler generates target-independent code because it +never queries for any target-specific information when generating code.

    + +

    The fact that LLVM provides a compact, target-independent, representation for +code gets a lot of people excited. Unfortunately, these people are usually +thinking about C or a language from the C family when they are asking questions +about language portability. I say "unfortunately", because there is really no +way to make (fully general) C code portable, other than shipping the source code +around (and of course, C source code is not actually portable in general +either - ever port a really old application from 32- to 64-bits?).

    + +

    The problem with C (again, in its full generality) is that it is heavily +laden with target specific assumptions. As one simple example, the preprocessor +often destructively removes target-independence from the code when it processes +the input text:

    + +
    +
    +#ifdef __i386__
    +  int X = 1;
    +#else
    +  int X = 42;
    +#endif
    +
    +
    + +

    While it is possible to engineer more and more complex solutions to problems +like this, it cannot be solved in full generality in a way that is better than shipping +the actual source code.

    + +

    That said, there are interesting subsets of C that can be made portable. If +you are willing to fix primitive types to a fixed size (say int = 32-bits, +and long = 64-bits), don't care about ABI compatibility with existing binaries, +and are willing to give up some other minor features, you can have portable +code. This can make sense for specialized domains such as an +in-kernel language.

    + +
    + + +
    Safety Guarantees
    + + +
    + +

    Many of the languages above are also "safe" languages: it is impossible for +a program written in Java to corrupt its address space and crash the process +(assuming the JVM has no bugs). +Safety is an interesting property that requires a combination of language +design, runtime support, and often operating system support.

    + +

    It is certainly possible to implement a safe language in LLVM, but LLVM IR +does not itself guarantee safety. The LLVM IR allows unsafe pointer casts, +use after free bugs, buffer over-runs, and a variety of other problems. Safety +needs to be implemented as a layer on top of LLVM and, conveniently, several +groups have investigated this. Ask on the llvmdev mailing +list if you are interested in more details.

    + +
    + + +
    Language-Specific +Optimizations
    + + +
    + +

    One thing about LLVM that turns off many people is that it does not solve all +the world's problems in one system (sorry 'world hunger', someone else will have +to solve you some other day). One specific complaint is that people perceive +LLVM as being incapable of performing high-level language-specific optimization: +LLVM "loses too much information".

    + +

    Unfortunately, this is really not the place to give you a full and unified +version of "Chris Lattner's theory of compiler design". Instead, I'll make a +few observations:

    + +

    First, you're right that LLVM does lose information. For example, as of this +writing, there is no way to distinguish in the LLVM IR whether an SSA-value came +from a C "int" or a C "long" on an ILP32 machine (other than debug info). Both +get compiled down to an 'i32' value and the information about what it came from +is lost. The more general issue here, is that the LLVM type system uses +"structural equivalence" instead of "name equivalence". Another place this +surprises people is if you have two types in a high-level language that have the +same structure (e.g. two different structs that have a single int field): these +types will compile down into a single LLVM type and it will be impossible to +tell what it came from.

    + +

    Second, while LLVM does lose information, LLVM is not a fixed target: we +continue to enhance and improve it in many different ways. In addition to +adding new features (LLVM did not always support exceptions or debug info), we +also extend the IR to capture important information for optimization (e.g. +whether an argument is sign or zero extended, information about pointers +aliasing, etc). Many of the enhancements are user-driven: people want LLVM to +include some specific feature, so they go ahead and extend it.

    + +

    Third, it is possible and easy to add language-specific +optimizations, and you have a number of choices in how to do it. As one trivial +example, it is easy to add language-specific optimization passes that +"know" things about code compiled for a language. In the case of the C family, +there is an optimization pass that "knows" about the standard C library +functions. If you call "exit(0)" in main(), it knows that it is safe to +optimize that into "return 0;" because C specifies what the 'exit' +function does.

    + +

    In addition to simple library knowledge, it is possible to embed a variety of +other language-specific information into the LLVM IR. If you have a specific +need and run into a wall, please bring the topic up on the llvmdev list. At the +very worst, you can always treat LLVM as if it were a "dumb code generator" and +implement the high-level optimizations you desire in your front-end, on the +language-specific AST. +

    + +
    + + +
    Tips and Tricks
    + + +
    + +

    There is a variety of useful tips and tricks that you come to know after +working on/with LLVM that aren't obvious at first glance. Instead of letting +everyone rediscover them, this section talks about some of these issues.

    + +
    + + +
    Implementing portable +offsetof/sizeof
    + + +
    + +

    One interesting thing that comes up, if you are trying to keep the code +generated by your compiler "target independent", is that you often need to know +the size of some LLVM type or the offset of some field in an llvm structure. +For example, you might need to pass the size of a type into a function that +allocates memory.

    + +

    Unfortunately, this can vary widely across targets: for example the width of +a pointer is trivially target-specific. However, there is a clever +way to use the getelementptr instruction that allows you to compute this +in a portable way.

    + +
    + + +
    Garbage Collected +Stack Frames
    + + +
    + +

    Some languages want to explicitly manage their stack frames, often so that +they are garbage collected or to allow easy implementation of closures. There +are often better ways to implement these features than explicit stack frames, +but LLVM +does support them, if you want. It requires your front-end to convert the +code into Continuation +Passing Style and the use of tail calls (which LLVM also supports).

    + +
    + + +
    +
    + Valid CSS! + Valid HTML 4.01! + + Chris Lattner
    + The LLVM Compiler Infrastructure
    + Last modified: $Date$ +
    + + diff --git a/docs/tutorial/index.html b/docs/tutorial/index.html index 250b533f3f8a..11dd5e2d732a 100644 --- a/docs/tutorial/index.html +++ b/docs/tutorial/index.html @@ -35,7 +35,7 @@
  • Extending the language: control flow
  • Extending the language: user-defined operators
  • Extending the language: mutable variables / SSA construction
  • -
  • Conclusion and other useful LLVM tidbits
  • +
  • Conclusion and other useful LLVM tidbits
  • Advanced Topics
      -- cgit v1.3