Regex
- Sequence of character that forms a search pattern
- can be a single character, or a more complicated pattern
- can be used to perform all types of text search and text replace operation
package com.oop.regex;
// ebook hlm 510
public class Example1 {
public static void main(String[] args) {
String s1 = "Java";
// pattern matching : mencari ada tidaknya suatu string di dalam string lain
System.out.println(s1+" equals Java : "+s1.equals("Java"));
System.out.println(s1+" matches Java : "+s1.matches("Java"));
// equals : parameter string
// matches : parameter regex (regular expression)
String regex1 = "J[a-z][a-z][a-z]";
String regex2 = "J[a-z]";
String regex3 = "j[a-z]";
System.out.println(s1+" matches "+regex1+": "+s1.matches(regex1));
System.out.println(s1+" matches "+regex2+": "+s1.matches(regex2));
System.out.println(s1+" matches "+regex3+": "+s1.matches(regex3));
/
[a-z] : A single lowercase letter that is a, b, c, d, ... or z
J[a-z][a-z][a-z] : A string that consists of four characters.
The first character is J.The second character is a, b, c, d, ... or z
J[a-z] : Same as above, * means matches 0 or more occurrences of the preceding expression
*/
String s2 = "3";
String regex4 = "[0123]";
System.out.println(s2+" matches "+regex4+": "+s2.matches(regex4));
// [0123] : A single digit 0, 1, 2, or 3
}
}
Common regex sintax :
Subexpression
|
Matches
|
|
1
|
^ | Matches the begining of the line |
2
|
$ | Matches the end of the line |
3
|
[...] | Matches any single character in the brackets |
4
|
a| b | Matches either a or b |
5
|
re* | Matches 0 or more occurrences of the preceding expression |
6
|
re+ | Matches 1 or more of the previous thing |
Regex sintax (set) :
Subexpression
|
Matches
|
|
1
|
[abc] | Set definition, can match the letter a or b or c. |
2
|
[abc][vz] | Set definition, can match a or b or c followed by either v or z. |
3
|
[^abc] | When a caret appears as the first character inside square brackets, it negates the pattern. This pattern matches any character except a or b or c. |
4
|
[a-d1-7] | Ranges: matches a letter between a and d and figures from 1 to 7, but not d1. |
Meta character :
Subexpression
|
Matches
|
|
1
|
\d | Any digit, short for [0-9] |
2
|
\D | A non-digit, short for [^0-9] |
3
|
\s | A whitespace character, short for [ \t\n\x0b\r\f] |
4
|
\S | A non-whitespace character, short for. |
5
|
\w | A word character, short for [a-zA-Z_0-9] |
6
|
\W | A non-word character [^\w] |
7
|
\S+ | Several non-whitespace characters |
8
|
\b | Matches a word boundary where a word character is [a-zA-Z0-9_] |
Quantifier :
* |
Occurs zero or more times, is short for |
|
+
|
Occurs one or more times, is short for |
|
?
|
Occurs no or one times, |
|
{X} |
Occurs X number of times, |
|
grouping :
- group parts of regex
- using round brackets ()
- $ you can refer to a group.
- $1 is the first group
- $2 is the second group
package com.oop.regex;
public class Example2 {
public static void main(String[] args) {
// formatting phone number
// mobile phone number, separate each 4 digits
String regex = "\b(\d{4})(\d{4})(\d{4})\b";
String replacementText = "$1-$2-$3";
System.out.println("085645123123".replaceAll(regex, replacementText));
// office phone number, separate between code area and phone number
regex = "\b(\d{1})(\d{3})(\d{6})\b";
replacementText = "(+62$2)-$3";
System.out.println("0271646999".replaceAll(regex, replacementText));
}
}
package com.oop.regex;
public class Example3 {
public static void main(String[] args) {
String EXAMPLE_TEST = "This is my small example "
+ "string which I'm going to " + "use for pattern matching.";
// matches word character [a-zA-Z_0-9]
System.out.println(EXAMPLE_TEST.matches("\\w.*"));
// split based on whitespace character [ \t\n\x0b\r\f]
String[] splitString = (EXAMPLE_TEST.split("\\s+"));
System.out.println(splitString.length);// should be 14
for (String string : splitString) {
System.out.println(string);
}
// replace all whitespace with tabs
System.out.println(EXAMPLE_TEST.replaceAll("\\s+", "\t"));
}
}
Pattern and Matcher :
- Advanced regular expressions
- java.util.regex
- create a pattern object which defines the regular expression
- pattern object allows to create a matcher object for a given string
- matcher object then allows to do regex operations on a string
package com.oop.regex;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
public class Example4 {
public static void main(String[] args) {
String EXAMPLE_TEST = "This is my small example string which I'm going to use "
+ "for pattern matching.";
Pattern pattern = Pattern.compile("\w+");
// in case you would like to ignore case sensitivity,
// you could use this statement:
// Pattern pattern = Pattern.compile("\s+", Pattern.CASE_INSENSITIVE);
Matcher matcher = pattern.matcher(EXAMPLE_TEST);
// check all occurance
while (matcher.find()) {
System.out.print("Start index: " + matcher.start());
System.out.print(", End index: " + matcher.end() + " --> ");
System.out.println(matcher.group());
}
// now create a new pattern and matcher to replace whitespace with tabs
Pattern replace = Pattern.compile("\s+");
Matcher matcher2 = replace.matcher(EXAMPLE_TEST);
System.out.println(matcher2.replaceAll("\t"));
}
}
Praktikum 7 :
// Tugas praktikum 7
// Tulis sebuah program Java dengan input NIM mahasiswa FMIPA UNS
// Gunakan regular expression untuk mengekstrak asal program studi dan tahun angkatan mahasiswa berdasarkan NIM yang diinputkan!
// Misal
// M0120001 -> Matematika angkatan 2020
// M0220001 -> Fisika angkatan 2020
// M0317001 -> Kimia angkatan 2017
// M0418001 -> Biologi angkatan 2018
// M0519001 -> Informatika angkatan 2019
// M0616001 -> Farmasi angkatan 2016
// M0719001 -> Statistika angkatan 2019
// M0819001 -> Ilmu Lingkungan angkatan 2019
// G0017075 -> Bukan mahasiswa FMIPA UNS
public class Prak7 {
public static void cek_jurusan(String nim){
if (nim.matches("M01[0-9]{6,6}")){
System.out.println("Matematika angkatan "+ "20"+nim.substring(3,5));
}
else if (nim.matches("M02[0-9]{5,5}")){
System.out.println("Fisika angkatan "+ "20"+nim.substring(3,5));
}
else if (nim.matches("M03[0-9]{5,5}")){
System.out.println("Kimia angkatan "+ "20"+nim.substring(3,5));
}
else if (nim.matches("M04[0-9]{5,5}")){
System.out.println("Biologi angkatan "+ "20"+nim.substring(3,5));
}
else if (nim.matches("M05[0-9]{5,5}")){
System.out.println("Informatika angkatan "+ "20"+nim.substring(3,5));
}
else if (nim.matches("M06[0-9]{5,5}")){
System.out.println("Farmasi angkatan "+ "20"+nim.substring(3,5));
}
else if (nim.matches("M07[0-9]{5,5}")){
System.out.println("Statistika angkatan "+ "20"+nim.substring(3,5));
}
else if (nim.matches("M08[0-9]{5,5}")){
System.out.println("Ilmu Lingkungan angkatan "+ "20"+nim.substring(3,5));
}
else {
System.out.println("Bukan angkatan mahasiswa FMIPA UNS");
}
}
public static void main(String[] args) {
String nim = "M0519064";
cek_jurusan(nim);
}
}