• Skip to content
  • Skip to link menu
  • KDE API Reference
  • kdepimlibs-4.14.10 API Reference
  • KDE Home
  • Contact Us
 

kpimutils

  • kpimutils
linklocator.cpp
Go to the documentation of this file.
1 /*
2  Copyright (c) 2002 Dave Corrie <kde@davecorrie.com>
3 
4  This library is free software; you can redistribute it and/or
5  modify it under the terms of the GNU Library General Public
6  License as published by the Free Software Foundation; either
7  version 2 of the License, or (at your option) any later version.
8 
9  This library is distributed in the hope that it will be useful,
10  but WITHOUT ANY WARRANTY; without even the implied warranty of
11  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12  Library General Public License for more details.
13 
14  You should have received a copy of the GNU Library General Public License
15  along with this library; see the file COPYING.LIB. If not, write to
16  the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
17  Boston, MA 02110-1301, USA.
18 */
29 #include "linklocator.h"
30 
31 #include <KEmoticons>
32 
33 #include <QtCore/QCoreApplication>
34 #include <QtCore/QFile>
35 #include <QtCore/QRegExp>
36 #include <QTextDocument>
37 
38 #include <climits>
39 
40 using namespace KPIMUtils;
41 
46 //@cond PRIVATE
47 class KPIMUtils::LinkLocator::Private
48 {
49  public:
50  int mMaxUrlLen;
51  int mMaxAddressLen;
52 };
53 //@endcond
54 
55 // Use a static for this as calls to the KEmoticons constructor are expensive.
56 K_GLOBAL_STATIC( KEmoticons, sEmoticons )
57 
58 LinkLocator::LinkLocator( const QString &text, int pos )
59  : mText( text ), mPos( pos ), d( new KPIMUtils::LinkLocator::Private )
60 {
61  d->mMaxUrlLen = 4096;
62  d->mMaxAddressLen = 255;
63 
64  // If you change either of the above values for maxUrlLen or
65  // maxAddressLen, then please also update the documentation for
66  // setMaxUrlLen()/setMaxAddressLen() in the header file AND the
67  // default values used for the maxUrlLen/maxAddressLen parameters
68  // of convertToHtml().
69 }
70 
71 LinkLocator::~LinkLocator()
72 {
73  delete d;
74 }
75 
76 void LinkLocator::setMaxUrlLen( int length )
77 {
78  d->mMaxUrlLen = length;
79 }
80 
81 int LinkLocator::maxUrlLen() const
82 {
83  return d->mMaxUrlLen;
84 }
85 
86 void LinkLocator::setMaxAddressLen( int length )
87 {
88  d->mMaxAddressLen = length;
89 }
90 
91 int LinkLocator::maxAddressLen() const
92 {
93  return d->mMaxAddressLen;
94 }
95 
96 QString LinkLocator::getUrl()
97 {
98  return getUrlAndCheckValidHref();
99 }
100 
101 
102 QString LinkLocator::getUrlAndCheckValidHref(bool *badurl)
103 {
104  QString url;
105  if ( atUrl() ) {
106  // NOTE: see http://tools.ietf.org/html/rfc3986#appendix-A and especially appendix-C
107  // Appendix-C mainly says, that when extracting URLs from plain text, line breaks shall
108  // be allowed and should be ignored when the URI is extracted.
109 
110  // This implementation follows this recommendation and
111  // allows the URL to be enclosed within different kind of brackets/quotes
112  // If an URL is enclosed, whitespace characters are allowed and removed, otherwise
113  // the URL ends with the first whitespace
114  // Also, if the URL is enclosed in brackets, the URL itself is not allowed
115  // to contain the closing bracket, as this would be detected as the end of the URL
116 
117  QChar beforeUrl, afterUrl;
118 
119  // detect if the url has been surrounded by brackets or quotes
120  if ( mPos > 0 ) {
121  beforeUrl = mText[mPos - 1];
122 
123  /*if ( beforeUrl == '(' ) {
124  afterUrl = ')';
125  } else */if ( beforeUrl == QLatin1Char('[') ) {
126  afterUrl = QLatin1Char(']');
127  } else if ( beforeUrl == QLatin1Char('<') ) {
128  afterUrl = QLatin1Char('>');
129  } else if ( beforeUrl == QLatin1Char('>') ) { // for e.g. <link>http://.....</link>
130  afterUrl = QLatin1Char('<');
131  } else if ( beforeUrl == QLatin1Char('"') ) {
132  afterUrl = QLatin1Char('"');
133  }
134  }
135 
136  url.reserve( maxUrlLen() ); // avoid allocs
137  int start = mPos;
138  bool previousCharIsADoubleQuote = false;
139  while ( ( mPos < (int)mText.length() ) &&
140  ( mText[mPos].isPrint() || mText[mPos].isSpace() ) &&
141  ( ( afterUrl.isNull() && !mText[mPos].isSpace() ) ||
142  ( !afterUrl.isNull() && mText[mPos] != afterUrl ) ) ) {
143  if ( !mText[mPos].isSpace() ) { // skip whitespace
144  if (mText[mPos] == QLatin1Char('>') && previousCharIsADoubleQuote) {
145  //it's an invalid url
146  if (badurl) {
147  *badurl = true;
148  }
149  return QString();
150  }
151  if (mText[mPos] == QLatin1Char('"')) {
152  previousCharIsADoubleQuote = true;
153  } else {
154  previousCharIsADoubleQuote = false;
155  }
156  url.append( mText[mPos] );
157  if ( url.length() > maxUrlLen() ) {
158  break;
159  }
160  }
161 
162  mPos++;
163  }
164 
165  if ( isEmptyUrl( url ) || ( url.length() > maxUrlLen() ) ) {
166  mPos = start;
167  url.clear();
168  } else {
169  --mPos;
170  }
171  }
172 
173  // HACK: This is actually against the RFC. However, most people don't properly escape the URL in
174  // their text with "" or <>. That leads to people writing an url, followed immediatley by
175  // a dot to finish the sentence. That would lead the parser to include the dot in the url,
176  // even though that is not wanted. So work around that here.
177  // Most real-life URLs hopefully don't end with dots or commas.
178  QList<QChar> wordBoundaries;
179  wordBoundaries << QLatin1Char('.') << QLatin1Char(',') << QLatin1Char(':') << QLatin1Char('!') << QLatin1Char('?') << QLatin1Char(')') << QLatin1Char('>');
180  if ( url.length() > 1 ) {
181  do {
182  if ( wordBoundaries.contains( url.at( url.length() - 1 ) ) ) {
183  url.chop( 1 );
184  --mPos;
185  } else {
186  break;
187  }
188  } while( url.length() > 1 );
189  }
190 
191  return url;
192 }
193 
194 // keep this in sync with KMMainWin::slotUrlClicked()
195 bool LinkLocator::atUrl() const
196 {
197  // the following characters are allowed in a dot-atom (RFC 2822):
198  // a-z A-Z 0-9 . ! # $ % & ' * + - / = ? ^ _ ` { | } ~
199  static const QString allowedSpecialChars = QLatin1String( ".!#$%&'*+-/=?^_`{|}~" );
200 
201  // the character directly before the URL must not be a letter, a number or
202  // any other character allowed in a dot-atom (RFC 2822).
203  if ( ( mPos > 0 ) &&
204  ( mText[mPos-1].isLetterOrNumber() ||
205  ( allowedSpecialChars.indexOf( mText[mPos-1] ) != -1 ) ) ) {
206  return false;
207  }
208 
209  QChar ch = mText[mPos];
210  return
211  ( ch == QLatin1Char('h') && ( mText.mid( mPos, 7 ) == QLatin1String( "http://" ) ||
212  mText.mid( mPos, 8 ) == QLatin1String( "https://" ) ) ) ||
213  ( ch == QLatin1Char('v') && mText.mid( mPos, 6 ) == QLatin1String( "vnc://" ) ) ||
214  ( ch == QLatin1Char('f') && ( mText.mid( mPos, 7 ) == QLatin1String( "fish://" ) ||
215  mText.mid( mPos, 6 ) == QLatin1String( "ftp://" ) ||
216  mText.mid( mPos, 7 ) == QLatin1String( "ftps://" ) ) ) ||
217  ( ch == QLatin1Char('s') && ( mText.mid( mPos, 7 ) == QLatin1String( "sftp://" ) ||
218  mText.mid( mPos, 6 ) == QLatin1String( "smb://" ) ) ) ||
219  ( ch == QLatin1Char('m') && mText.mid( mPos, 7 ) == QLatin1String( "mailto:" ) ) ||
220  ( ch == QLatin1Char('w') && mText.mid( mPos, 4 ) == QLatin1String( "www." ) ) ||
221  ( ch == QLatin1Char('f') && ( mText.mid( mPos, 4 ) == QLatin1String( "ftp." ) ||
222  mText.mid( mPos, 7 ) == QLatin1String( "file://" ) ) )||
223  ( ch == QLatin1Char('n') && mText.mid( mPos, 5 ) == QLatin1String( "news:" ) );
224 }
225 
226 bool LinkLocator::isEmptyUrl( const QString &url ) const
227 {
228  return url.isEmpty() ||
229  url == QLatin1String( "http://" ) ||
230  url == QLatin1String( "https://" ) ||
231  url == QLatin1String( "fish://" ) ||
232  url == QLatin1String( "ftp://" ) ||
233  url == QLatin1String( "ftps://" ) ||
234  url == QLatin1String( "sftp://" ) ||
235  url == QLatin1String( "smb://" ) ||
236  url == QLatin1String( "vnc://" ) ||
237  url == QLatin1String( "mailto" ) ||
238  url == QLatin1String( "www" ) ||
239  url == QLatin1String( "ftp" ) ||
240  url == QLatin1String( "news" ) ||
241  url == QLatin1String( "news://" );
242 }
243 
244 QString LinkLocator::getEmailAddress()
245 {
246  QString address;
247 
248  if ( mText[mPos] == QLatin1Char('@') ) {
249  // the following characters are allowed in a dot-atom (RFC 2822):
250  // a-z A-Z 0-9 . ! # $ % & ' * + - / = ? ^ _ ` { | } ~
251  static const QString allowedSpecialChars = QLatin1String( ".!#$%&'*+-/=?^_`{|}~" );
252 
253  // determine the local part of the email address
254  int start = mPos - 1;
255  while ( start >= 0 && mText[start].unicode() < 128 &&
256  ( mText[start].isLetterOrNumber() ||
257  mText[start] == QLatin1Char('@') || // allow @ to find invalid email addresses
258  allowedSpecialChars.indexOf( mText[start] ) != -1 ) ) {
259  if ( mText[start] == QLatin1Char('@') ) {
260  return QString(); // local part contains '@' -> no email address
261  }
262  --start;
263  }
264  ++start;
265  // we assume that an email address starts with a letter or a digit
266  while ( ( start < mPos ) && !mText[start].isLetterOrNumber() ) {
267  ++start;
268  }
269  if ( start == mPos ) {
270  return QString(); // local part is empty -> no email address
271  }
272 
273  // determine the domain part of the email address
274  int dotPos = INT_MAX;
275  int end = mPos + 1;
276  while ( end < (int)mText.length() &&
277  ( mText[end].isLetterOrNumber() ||
278  mText[end] == QLatin1Char('@') || // allow @ to find invalid email addresses
279  mText[end] == QLatin1Char('.') ||
280  mText[end] == QLatin1Char('-') ) ) {
281  if ( mText[end] == QLatin1Char('@') ) {
282  return QString(); // domain part contains '@' -> no email address
283  }
284  if ( mText[end] == QLatin1Char('.') ) {
285  dotPos = qMin( dotPos, end ); // remember index of first dot in domain
286  }
287  ++end;
288  }
289  // we assume that an email address ends with a letter or a digit
290  while ( ( end > mPos ) && !mText[end - 1].isLetterOrNumber() ) {
291  --end;
292  }
293  if ( end == mPos ) {
294  return QString(); // domain part is empty -> no email address
295  }
296  if ( dotPos >= end ) {
297  return QString(); // domain part doesn't contain a dot
298  }
299 
300  if ( end - start > maxAddressLen() ) {
301  return QString(); // too long -> most likely no email address
302  }
303  address = mText.mid( start, end - start );
304 
305  mPos = end - 1;
306  }
307  return address;
308 }
309 
310 QString LinkLocator::convertToHtml( const QString &plainText, int flags,
311  int maxUrlLen, int maxAddressLen )
312 {
313  LinkLocator locator( plainText );
314  locator.setMaxUrlLen( maxUrlLen );
315  locator.setMaxAddressLen( maxAddressLen );
316 
317  QString str;
318  QString result( (QChar*)0, (int)locator.mText.length() * 2 );
319  QChar ch;
320  int x;
321  bool startOfLine = true;
322 
323  for ( locator.mPos = 0, x = 0; locator.mPos < (int)locator.mText.length();
324  locator.mPos++, x++ ) {
325  ch = locator.mText[locator.mPos];
326  if ( flags & PreserveSpaces ) {
327  if ( ch == QLatin1Char(' ') ) {
328  if ( locator.mPos + 1 < locator.mText.length() ) {
329  if ( locator.mText[locator.mPos + 1] != QLatin1Char(' ') ) {
330 
331  // A single space, make it breaking if not at the start or end of the line
332  const bool endOfLine = locator.mText[locator.mPos + 1] == QLatin1Char('\n');
333  if ( !startOfLine && !endOfLine ) {
334  result += QLatin1Char(' ');
335  } else {
336  result += QLatin1String("&nbsp;");
337  }
338  } else {
339 
340  // Whitespace of more than one space, make it all non-breaking
341  while ( locator.mPos < locator.mText.length() && locator.mText[locator.mPos] == QLatin1Char(' ') ) {
342  result += QLatin1String("&nbsp;");
343  locator.mPos++;
344  x++;
345  }
346 
347  // We incremented once to often, undo that
348  locator.mPos--;
349  x--;
350  }
351  } else {
352  // Last space in the text, it is non-breaking
353  result += QLatin1String("&nbsp;");
354  }
355 
356  if ( startOfLine ) {
357  startOfLine = false;
358  }
359  continue;
360  } else if ( ch == QLatin1Char('\t') ) {
361  do {
362  result += QLatin1String("&nbsp;");
363  x++;
364  } while ( ( x & 7 ) != 0 );
365  x--;
366  startOfLine = false;
367  continue;
368  }
369  }
370  if ( ch == QLatin1Char('\n') ) {
371  result += QLatin1String("<br />\n"); // Keep the \n, so apps can figure out the quoting levels correctly.
372  startOfLine = true;
373  x = -1;
374  continue;
375  }
376 
377  startOfLine = false;
378  if ( ch == QLatin1Char('&') ) {
379  result += QLatin1String("&amp;");
380  } else if ( ch == QLatin1Char('"') ) {
381  result += QLatin1String("&quot;");
382  } else if ( ch == QLatin1Char('<') ) {
383  result += QLatin1String("&lt;");
384  } else if ( ch == QLatin1Char('>') ) {
385  result += QLatin1String("&gt;");
386  } else {
387  const int start = locator.mPos;
388  if ( !( flags & IgnoreUrls ) ) {
389  bool badUrl = false;
390  str = locator.getUrlAndCheckValidHref(&badUrl);
391  if (badUrl) {
392  return locator.mText;
393  }
394 
395  if ( !str.isEmpty() ) {
396  QString hyperlink;
397  if ( str.left( 4 ) == QLatin1String("www.") ) {
398  hyperlink = QLatin1String("http://") + str;
399  } else if ( str.left( 4 ) == QLatin1String("ftp.") ) {
400  hyperlink = QLatin1String("ftp://") + str;
401  } else {
402  hyperlink = str;
403  }
404 
405  result += QLatin1String("<a href=\"") + hyperlink + QLatin1String("\">") + Qt::escape( str ) + QLatin1String("</a>");
406  x += locator.mPos - start;
407  continue;
408  }
409  str = locator.getEmailAddress();
410  if ( !str.isEmpty() ) {
411  // len is the length of the local part
412  int len = str.indexOf( QLatin1Char('@') );
413  QString localPart = str.left( len );
414 
415  // remove the local part from the result (as '&'s have been expanded to
416  // &amp; we have to take care of the 4 additional characters per '&')
417  result.truncate( result.length() -
418  len - ( localPart.count( QLatin1Char('&') ) * 4 ) );
419  x -= len;
420 
421  result += QLatin1String("<a href=\"mailto:") + str + QLatin1String("\">") + str + QLatin1String("</a>");
422  x += str.length() - 1;
423  continue;
424  }
425  }
426  if ( flags & HighlightText ) {
427  str = locator.highlightedText();
428  if ( !str.isEmpty() ) {
429  result += str;
430  x += locator.mPos - start;
431  continue;
432  }
433  }
434  result += ch;
435  }
436  }
437 
438  if ( flags & ReplaceSmileys ) {
439  QStringList exclude;
440  exclude << QLatin1String("(c)") << QLatin1String("(C)") << QLatin1String("&gt;:-(") << QLatin1String("&gt;:(") << QLatin1String("(B)") << QLatin1String("(b)") << QLatin1String("(P)") << QLatin1String("(p)");
441  exclude << QLatin1String("(O)") << QLatin1String("(o)") << QLatin1String("(D)") << QLatin1String("(d)") << QLatin1String("(E)") << QLatin1String("(e)") << QLatin1String("(K)")<< QLatin1String("(k)");
442  exclude << QLatin1String("(I)") << QLatin1String("(i)") << QLatin1String("(L)") << QLatin1String("(l)") << QLatin1String("(8)") << QLatin1String("(T)") << QLatin1String("(t)") << QLatin1String("(G)");
443  exclude << QLatin1String("(g)") << QLatin1String("(F)") << QLatin1String("(f)") << QLatin1String("(H)");
444  exclude << QLatin1String("8)") << QLatin1String("(N)") << QLatin1String("(n)") << QLatin1String("(Y)") << QLatin1String("(y)" )<< QLatin1String("(U)") << QLatin1String("(u)") << QLatin1String("(W)") << QLatin1String("(w)");
445  static QString cachedEmoticonsThemeName;
446  if ( cachedEmoticonsThemeName.isEmpty() ) {
447  cachedEmoticonsThemeName = KEmoticons::currentThemeName();
448  }
449  result =
450  sEmoticons->theme( cachedEmoticonsThemeName ).parseEmoticons(
451  result, KEmoticonsTheme::StrictParse | KEmoticonsTheme::SkipHTML, exclude );
452  }
453 
454  return result;
455 }
456 
457 QString LinkLocator::pngToDataUrl( const QString &iconPath )
458 {
459  if ( iconPath.isEmpty() ) {
460  return QString();
461  }
462 
463  QFile pngFile( iconPath );
464  if ( !pngFile.open( QIODevice::ReadOnly | QIODevice::Unbuffered ) ) {
465  return QString();
466  }
467 
468  QByteArray ba = pngFile.readAll();
469  pngFile.close();
470  return QString::fromLatin1( "data:image/png;base64,%1" ).arg( QLatin1String(ba.toBase64().constData()) );
471 }
472 
473 QString LinkLocator::highlightedText()
474 {
475  // formating symbols must be prepended with a whitespace
476  if ( ( mPos > 0 ) && !mText[mPos-1].isSpace() ) {
477  return QString();
478  }
479 
480  const QChar ch = mText[mPos];
481  if ( ch != QLatin1Char('/') && ch != QLatin1Char('*') && ch != QLatin1Char('_') && ch != QLatin1Char('-') ) {
482  return QString();
483  }
484 
485  QRegExp re =
486  QRegExp( QString::fromLatin1( "\\%1((\\w+)([\\s-']\\w+)*( ?[,.:\\?!;])?)\\%2" ).arg( ch ).arg( ch ) );
487  re.setMinimal( true );
488  if ( re.indexIn( mText, mPos ) == mPos ) {
489  int length = re.matchedLength();
490  // there must be a whitespace after the closing formating symbol
491  if ( mPos + length < mText.length() && !mText[mPos + length].isSpace() ) {
492  return QString();
493  }
494  mPos += length - 1;
495  switch ( ch.toLatin1() ) {
496  case '*':
497  return QLatin1String("<b>*") + re.cap( 1 ) + QLatin1String("*</b>");
498  case '_':
499  return QLatin1String("<u>_") + re.cap( 1 ) + QLatin1String("_</u>");
500  case '/':
501  return QLatin1String("<i>/") + re.cap( 1 ) + QLatin1String("/</i>");
502  case '-':
503  return QLatin1String("<strike>-") + re.cap( 1 ) + QLatin1String("-</strike>");
504  }
505  }
506  return QString();
507 }
KPIMUtils::LinkLocator::pngToDataUrl
static QString pngToDataUrl(const QString &iconPath)
Embeds the given PNG image into a data URL.
Definition: linklocator.cpp:457
KPIMUtils::LinkLocator::mPos
int mPos
The current scan position.
Definition: linklocator.h:168
KPIMUtils::LinkLocator::maxUrlLen
int maxUrlLen() const
Returns the current limit on the maximum length of a URL.
Definition: linklocator.cpp:81
KPIMUtils::LinkLocator::~LinkLocator
~LinkLocator()
Destructor.
Definition: linklocator.cpp:71
KPIMUtils::LinkLocator::mText
QString mText
The plaintext string being scanned for URLs and email addresses.
Definition: linklocator.h:163
KPIMUtils
Definition: email.h:42
KPIMUtils::LinkLocator::getUrl
QString getUrl()
Attempts to grab a URL starting at the current scan position.
Definition: linklocator.cpp:96
KPIMUtils::LinkLocator::setMaxAddressLen
void setMaxAddressLen(int length)
Sets the maximum length of email addresses that will be matched by getEmailAddress().
Definition: linklocator.cpp:86
KPIMUtils::LinkLocator::highlightedText
QString highlightedText()
Highlight text according to bold, /italic/ and underlined markup.
Definition: linklocator.cpp:473
KPIMUtils::LinkLocator::getEmailAddress
QString getEmailAddress()
Attempts to grab an email address.
Definition: linklocator.cpp:244
KPIMUtils::LinkLocator
LinkLocator assists in identifying sections of text that can usefully be converted in hyperlinks in H...
Definition: linklocator.h:48
KPIMUtils::LinkLocator::setMaxUrlLen
void setMaxUrlLen(int length)
Sets the maximum length of URLs that will be matched by getUrl().
Definition: linklocator.cpp:76
KPIMUtils::LinkLocator::maxAddressLen
int maxAddressLen() const
Returns the current limit on the maximum length of an email address.
Definition: linklocator.cpp:91
linklocator.h
This file is part of the KDEPIM Utilities library and provides the LinkLocator class.
KPIMUtils::LinkLocator::convertToHtml
static QString convertToHtml(const QString &plainText, int flags=0, int maxUrlLen=4096, int maxAddressLen=255)
Converts plaintext into html.
Definition: linklocator.cpp:310
This file is part of the KDE documentation.
Documentation copyright © 1996-2017 The KDE developers.
Generated on Mon Aug 7 2017 09:24:33 by doxygen 1.8.13 written by Dimitri van Heesch, © 1997-2006

KDE's Doxygen guidelines are available online.

kpimutils

Skip menu "kpimutils"
  • Main Page
  • Alphabetical List
  • Class List
  • Class Hierarchy
  • Class Members
  • File List
  • Modules
  • Related Pages

kdepimlibs-4.14.10 API Reference

Skip menu "kdepimlibs-4.14.10 API Reference"
  • akonadi
  •   contact
  •   kmime
  •   socialutils
  • kabc
  • kalarmcal
  • kblog
  • kcal
  • kcalcore
  • kcalutils
  • kholidays
  • kimap
  • kioslave
  •   imap4
  •   mbox
  •   nntp
  • kldap
  • kmbox
  • kmime
  • kontactinterface
  • kpimidentities
  • kpimtextedit
  • kpimutils
  • kresources
  • ktnef
  • kxmlrpcclient
  • mailtransport
  • microblog
  • qgpgme
  • syndication
  •   atom
  •   rdf
  •   rss2
Report problems with this website to our bug tracking system.
Contact the specific authors with questions and comments about the page contents.

KDE® and the K Desktop Environment® logo are registered trademarks of KDE e.V. | Legal